diff options
Diffstat (limited to 'fs/nfs')
45 files changed, 1853 insertions, 1373 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 2905479f214a..0ca370d23ddb 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -381,7 +381,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync) struct blk_plug plug; int i; - dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); + dprintk("%s enter, %zu@%lld\n", __func__, count, offset); /* At this point, header->page_aray is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 6de15709d024..2ae676f93e6b 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -141,8 +141,7 @@ int nfs_cache_register_net(struct net *net, struct cache_detail *cd) void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd) { - if (cd->u.pipefs.dir) - sunrpc_cache_unregister_pipefs(cd); + sunrpc_cache_unregister_pipefs(cd); } void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 484bebc20bca..773774531aff 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -9,6 +9,7 @@ #include <linux/completion.h> #include <linux/ip.h> #include <linux/module.h> +#include <linux/sched/signal.h> #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> #include <linux/nfs_fs.h> @@ -231,12 +232,12 @@ static struct svc_serv_ops nfs41_cb_sv_ops = { .svo_module = THIS_MODULE, }; -struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = &nfs41_cb_sv_ops, }; #else -struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = NULL, }; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index e9aa235e9d10..f073a6d2c6a5 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -110,20 +110,52 @@ out: #if defined(CONFIG_NFS_V4_1) /* - * Lookup a layout by filehandle. + * Lookup a layout inode by stateid * - * Note: gets a refcount on the layout hdr and on its respective inode. - * Caller must put the layout hdr and the inode. + * Note: returns a refcount on the inode and superblock + */ +static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp, + const nfs4_stateid *stateid) +{ + struct nfs_server *server; + struct inode *inode; + struct pnfs_layout_hdr *lo; + +restart: + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (stateid != NULL && + !nfs4_stateid_match_other(stateid, &lo->plh_stateid)) + continue; + inode = igrab(lo->plh_inode); + if (!inode) + continue; + if (!nfs_sb_active(inode->i_sb)) { + rcu_read_lock(); + spin_unlock(&clp->cl_lock); + iput(inode); + spin_lock(&clp->cl_lock); + goto restart; + } + return inode; + } + } + + return NULL; +} + +/* + * Lookup a layout inode by filehandle. + * + * Note: returns a refcount on the inode and superblock * - * TODO: keep track of all layouts (and delegations) in a hash table - * hashed by filehandle. */ -static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, - struct nfs_fh *fh) +static struct inode *nfs_layout_find_inode_by_fh(struct nfs_client *clp, + const struct nfs_fh *fh) { struct nfs_server *server; struct nfs_inode *nfsi; - struct inode *ino; + struct inode *inode; struct pnfs_layout_hdr *lo; restart: @@ -134,37 +166,38 @@ restart: continue; if (nfsi->layout != lo) continue; - ino = igrab(lo->plh_inode); - if (!ino) - break; - spin_lock(&ino->i_lock); - /* Is this layout in the process of being freed? */ - if (nfsi->layout != lo) { - spin_unlock(&ino->i_lock); - iput(ino); + inode = igrab(lo->plh_inode); + if (!inode) + continue; + if (!nfs_sb_active(inode->i_sb)) { + rcu_read_lock(); + spin_unlock(&clp->cl_lock); + iput(inode); + spin_lock(&clp->cl_lock); goto restart; } - pnfs_get_layout_hdr(lo); - spin_unlock(&ino->i_lock); - return lo; + return inode; } } return NULL; } -static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, - struct nfs_fh *fh) +static struct inode *nfs_layout_find_inode(struct nfs_client *clp, + const struct nfs_fh *fh, + const nfs4_stateid *stateid) { - struct pnfs_layout_hdr *lo; + struct inode *inode; spin_lock(&clp->cl_lock); rcu_read_lock(); - lo = get_layout_by_fh_locked(clp, fh); + inode = nfs_layout_find_inode_by_stateid(clp, stateid); + if (!inode) + inode = nfs_layout_find_inode_by_fh(clp, fh); rcu_read_unlock(); spin_unlock(&clp->cl_lock); - return lo; + return inode; } /* @@ -213,18 +246,20 @@ static u32 initiate_file_draining(struct nfs_client *clp, u32 rv = NFS4ERR_NOMATCHING_LAYOUT; LIST_HEAD(free_me_list); - lo = get_layout_by_fh(clp, &args->cbl_fh); - if (!lo) { - trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL, - &args->cbl_stateid, -rv); + ino = nfs_layout_find_inode(clp, &args->cbl_fh, &args->cbl_stateid); + if (!ino) goto out; - } - ino = lo->plh_inode; pnfs_layoutcommit_inode(ino, false); spin_lock(&ino->i_lock); + lo = NFS_I(ino)->layout; + if (!lo) { + spin_unlock(&ino->i_lock); + goto out; + } + pnfs_get_layout_hdr(lo); rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid); if (rv != NFS_OK) goto unlock; @@ -258,10 +293,10 @@ unlock: /* Free all lsegs that are attached to commit buckets */ nfs_commit_inode(ino, 0); pnfs_put_layout_hdr(lo); +out: trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino, &args->cbl_stateid, -rv); - iput(ino); -out: + nfs_iput_and_deactive(ino); return rv; } diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index eb094c6011d8..d051fc3583a9 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -83,23 +83,15 @@ static __be32 *read_buf(struct xdr_stream *xdr, size_t nbytes) return p; } -static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str) +static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, + const char **str, size_t maxlen) { - __be32 *p; - - p = read_buf(xdr, 4); - if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); - *len = ntohl(*p); - - if (*len != 0) { - p = read_buf(xdr, *len); - if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); - *str = (const char *)p; - } else - *str = NULL; + ssize_t err; + err = xdr_stream_decode_opaque_inline(xdr, (void **)str, maxlen); + if (err < 0) + return cpu_to_be32(NFS4ERR_RESOURCE); + *len = err; return 0; } @@ -162,15 +154,9 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound __be32 *p; __be32 status; - status = decode_string(xdr, &hdr->taglen, &hdr->tag); + status = decode_string(xdr, &hdr->taglen, &hdr->tag, CB_OP_TAGLEN_MAXSZ); if (unlikely(status != 0)) return status; - /* We do not like overly long tags! */ - if (hdr->taglen > CB_OP_TAGLEN_MAXSZ) { - printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n", - __func__, hdr->taglen); - return htonl(NFS4ERR_RESOURCE); - } p = read_buf(xdr, 12); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); @@ -582,12 +568,8 @@ out: static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) { - __be32 *p; - - p = xdr_reserve_space(xdr, 4 + len); - if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); - xdr_encode_opaque(p, str, len); + if (unlikely(xdr_stream_encode_opaque(xdr, str, len) < 0)) + return cpu_to_be32(NFS4ERR_RESOURCE); return 0; } @@ -1083,7 +1065,8 @@ struct svc_version nfs4_callback_version1 = { .vs_proc = nfs4_callback_procedures1, .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, .vs_dispatch = NULL, - .vs_hidden = 1, + .vs_hidden = true, + .vs_need_cong_ctrl = true, }; struct svc_version nfs4_callback_version4 = { @@ -1092,5 +1075,6 @@ struct svc_version nfs4_callback_version4 = { .vs_proc = nfs4_callback_procedures1, .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, .vs_dispatch = NULL, - .vs_hidden = 1, + .vs_hidden = true, + .vs_need_cong_ctrl = true, }; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ebecfb8fba06..390ada8741bc 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -325,10 +325,33 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat return NULL; } -static bool nfs_client_init_is_complete(const struct nfs_client *clp) +/* + * Return true if @clp is done initializing, false if still working on it. + * + * Use nfs_client_init_status to check if it was successful. + */ +bool nfs_client_init_is_complete(const struct nfs_client *clp) { return clp->cl_cons_state <= NFS_CS_READY; } +EXPORT_SYMBOL_GPL(nfs_client_init_is_complete); + +/* + * Return 0 if @clp was successfully initialized, -errno otherwise. + * + * This must be called *after* nfs_client_init_is_complete() returns true, + * otherwise it will pop WARN_ON_ONCE and return -EINVAL + */ +int nfs_client_init_status(const struct nfs_client *clp) +{ + /* called without checking nfs_client_init_is_complete */ + if (clp->cl_cons_state > NFS_CS_READY) { + WARN_ON_ONCE(1); + return -EINVAL; + } + return clp->cl_cons_state; +} +EXPORT_SYMBOL_GPL(nfs_client_init_status); int nfs_wait_client_init_complete(const struct nfs_client *clp) { @@ -369,9 +392,7 @@ nfs_found_client(const struct nfs_client_initdata *cl_init, * Look up a client by IP address and protocol version * - creates a new record if one doesn't yet exist */ -struct nfs_client * -nfs_get_client(const struct nfs_client_initdata *cl_init, - rpc_authflavor_t authflavour) +struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) { struct nfs_client *clp, *new = NULL; struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); @@ -655,7 +676,7 @@ static int nfs_init_server(struct nfs_server *server, set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init, RPC_AUTH_UNIX); + clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) { dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); return PTR_ERR(clp); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index dff600ae0d74..d7df5e67b0c1 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -391,10 +391,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; - /* Ensure we revalidate the attributes and page cache! */ - spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_REVAL_FORCED; - spin_unlock(&inode->i_lock); trace_nfs4_set_delegation(inode, res->delegation_type); out: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5f1af4cd1a33..f92ba8d6c556 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -455,14 +455,17 @@ bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx) } /* - * This function is called by the lookup code to request the use of - * readdirplus to accelerate any future lookups in the same + * This function is called by the lookup and getattr code to request the + * use of readdirplus to accelerate any future lookups in the same * directory. */ -static void nfs_advise_use_readdirplus(struct inode *dir) { - set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags); + struct nfs_inode *nfsi = NFS_I(dir); + + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && + !list_empty(&nfsi->open_files)) + set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); } /* @@ -475,9 +478,12 @@ void nfs_advise_use_readdirplus(struct inode *dir) */ void nfs_force_use_readdirplus(struct inode *dir) { - if (!list_empty(&NFS_I(dir)->open_files)) { - nfs_advise_use_readdirplus(dir); - nfs_zap_mapping(dir, dir->i_mapping); + struct nfs_inode *nfsi = NFS_I(dir); + + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && + !list_empty(&nfsi->open_files)) { + set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); + invalidate_mapping_pages(dir->i_mapping, 0, -1); } } @@ -886,17 +892,6 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) goto out; } -static bool nfs_dir_mapping_need_revalidate(struct inode *dir) -{ - struct nfs_inode *nfsi = NFS_I(dir); - - if (nfs_attribute_cache_expired(dir)) - return true; - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - return true; - return false; -} - /* The file offset position represents the dirent entry number. A last cookie cache takes care of the common case of reading the whole directory. @@ -928,7 +923,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; - if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode)) + if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) goto out; @@ -1035,8 +1030,6 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate); static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, int rcu_walk) { - int ret; - if (IS_ROOT(dentry)) return 1; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) @@ -1044,12 +1037,12 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; /* Revalidate nfsi->cache_change_attribute before we declare a match */ - if (rcu_walk) - ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir); - else - ret = nfs_revalidate_inode(NFS_SERVER(dir), dir); - if (ret < 0) - return 0; + if (nfs_mapping_need_revalidate_inode(dir)) { + if (rcu_walk) + return 0; + if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) + return 0; + } if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; return 1; @@ -1161,7 +1154,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) return -ECHILD; goto out_bad; } - goto out_valid_noent; + goto out_valid; } if (is_bad_inode(inode)) { @@ -1184,6 +1177,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) return -ECHILD; goto out_zap_parent; } + nfs_advise_use_readdirplus(dir); goto out_valid; } @@ -1219,12 +1213,12 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) nfs_free_fhandle(fhandle); nfs4_label_free(label); + /* set a readdirplus hint that we had a cache miss */ + nfs_force_use_readdirplus(dir); + out_set_verifier: nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_valid: - /* Success: notify readdir to use READDIRPLUS */ - nfs_advise_use_readdirplus(dir); - out_valid_noent: if (flags & LOOKUP_RCU) { if (parent != ACCESS_ONCE(dentry->d_parent)) return -ECHILD; @@ -1279,8 +1273,8 @@ out_error: */ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) { - int error; struct inode *inode = d_inode(dentry); + int error = 0; /* * I believe we can only get a negative dentry here in the case of a @@ -1299,7 +1293,8 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) return 0; } - error = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (nfs_mapping_need_revalidate_inode(inode)) + error = __nfs_revalidate_inode(NFS_SERVER(inode), inode); dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", __func__, inode->i_ino, error ? "invalid" : "valid"); return !error; @@ -1424,8 +1419,8 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in if (IS_ERR(res)) goto out_label; - /* Success: notify readdir to use READDIRPLUS */ - nfs_advise_use_readdirplus(dir); + /* Notify readdir to use READDIRPLUS */ + nfs_force_use_readdirplus(dir); no_entry: res = d_splice_alias(inode, dentry); @@ -1467,9 +1462,9 @@ static fmode_t flags_to_mode(int flags) return res; } -static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags) +static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp) { - return alloc_nfs_open_context(dentry, flags_to_mode(open_flags)); + return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp); } static int do_open(struct inode *inode, struct file *filp) @@ -1535,8 +1530,13 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, return -ENAMETOOLONG; if (open_flags & O_CREAT) { + struct nfs_server *server = NFS_SERVER(dir); + + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + mode &= ~current_umask(); + attr.ia_valid |= ATTR_MODE; - attr.ia_mode = mode & ~current_umask(); + attr.ia_mode = mode; } if (open_flags & O_TRUNC) { attr.ia_valid |= ATTR_SIZE; @@ -1554,7 +1554,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, return finish_no_open(file, dentry); } - ctx = create_nfs_open_context(dentry, open_flags); + ctx = create_nfs_open_context(dentry, open_flags, file); err = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; @@ -2002,6 +2002,29 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) } EXPORT_SYMBOL_GPL(nfs_link); +static void +nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data) +{ + struct dentry *old_dentry = data->old_dentry; + struct dentry *new_dentry = data->new_dentry; + struct inode *old_inode = d_inode(old_dentry); + struct inode *new_inode = d_inode(new_dentry); + + nfs_mark_for_revalidate(old_inode); + + switch (task->tk_status) { + case 0: + if (new_inode != NULL) + nfs_drop_nlink(new_inode); + d_move(old_dentry, new_dentry); + nfs_set_verifier(new_dentry, + nfs_save_change_attribute(data->new_dir)); + break; + case -ENOENT: + nfs_dentry_handle_enoent(old_dentry); + } +} + /* * RENAME * FIXME: Some nfsds, like the Linux user space nfsd, may generate a @@ -2032,7 +2055,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, { struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); - struct dentry *dentry = NULL, *rehash = NULL; + struct dentry *dentry = NULL; struct rpc_task *task; int error = -EBUSY; @@ -2055,10 +2078,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, * To prevent any new references to the target during the * rename, we unhash the dentry in advance. */ - if (!d_unhashed(new_dentry)) { + if (!d_unhashed(new_dentry)) d_drop(new_dentry); - rehash = new_dentry; - } if (d_count(new_dentry) > 2) { int err; @@ -2075,7 +2096,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; new_dentry = dentry; - rehash = NULL; new_inode = NULL; } } @@ -2084,7 +2104,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_inode != NULL) NFS_PROTO(new_inode)->return_delegation(new_inode); - task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); + task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, + nfs_complete_rename); if (IS_ERR(task)) { error = PTR_ERR(task); goto out; @@ -2094,21 +2115,9 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (error == 0) error = task->tk_status; rpc_put_task(task); - nfs_mark_for_revalidate(old_inode); out: - if (rehash) - d_rehash(rehash); trace_nfs_rename_exit(old_dir, old_dentry, new_dir, new_dentry, error); - if (!error) { - if (new_inode != NULL) - nfs_drop_nlink(new_inode); - d_move(old_dentry, new_dentry); - nfs_set_verifier(new_dentry, - nfs_save_change_attribute(new_dir)); - } else if (error == -ENOENT) - nfs_dentry_handle_enoent(old_dentry); - /* new dentry created? */ if (dentry) dput(dentry); @@ -2286,8 +2295,7 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str if (cache == NULL) goto out; /* Found an entry, is our attribute cache valid? */ - if (!nfs_attribute_cache_expired(inode) && - !(nfsi->cache_validity & NFS_INO_INVALID_ATTR)) + if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) break; err = -ECHILD; if (!may_block) @@ -2335,12 +2343,12 @@ static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, cache = NULL; if (cache == NULL) goto out; - err = nfs_revalidate_inode_rcu(NFS_SERVER(inode), inode); - if (err) + if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; res->jiffies = cache->jiffies; res->cred = cache->cred; res->mask = cache->mask; + err = 0; out: rcu_read_unlock(); return err; @@ -2492,12 +2500,13 @@ EXPORT_SYMBOL_GPL(nfs_may_open); static int nfs_execute_ok(struct inode *inode, int mask) { struct nfs_server *server = NFS_SERVER(inode); - int ret; + int ret = 0; - if (mask & MAY_NOT_BLOCK) - ret = nfs_revalidate_inode_rcu(server, inode); - else - ret = nfs_revalidate_inode(server, inode); + if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) { + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + ret = __nfs_revalidate_inode(server, inode); + } if (ret == 0 && !execute_ok(inode)) ret = -EACCES; return ret; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bd81bcf3ffcf..aab32fc3d6a8 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -52,7 +52,7 @@ #include <linux/nfs_page.h> #include <linux/sunrpc/clnt.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/atomic.h> #include "internal.h" @@ -105,7 +105,7 @@ struct nfs_direct_req { static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops; -static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); +static void nfs_direct_write_complete(struct nfs_direct_req *dreq); static void nfs_direct_write_schedule_work(struct work_struct *work); static inline void get_dreq(struct nfs_direct_req *dreq) @@ -684,7 +684,7 @@ out_failed: } if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, dreq->inode); + nfs_direct_write_complete(dreq); } static void nfs_direct_commit_complete(struct nfs_commit_data *data) @@ -717,7 +717,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) } if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) - nfs_direct_write_complete(dreq, data->inode); + nfs_direct_write_complete(dreq); } static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, @@ -768,7 +768,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work) } } -static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +static void nfs_direct_write_complete(struct nfs_direct_req *dreq) { schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ } @@ -824,7 +824,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) out_put: if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, hdr->inode); + nfs_direct_write_complete(dreq); hdr->release(hdr); } @@ -953,7 +953,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, } if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, dreq->inode); + nfs_direct_write_complete(dreq); return 0; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 9ea85ae23c32..668213984d68 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -29,7 +29,7 @@ #include <linux/gfp.h> #include <linux/swap.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "delegation.h" #include "internal.h" @@ -101,18 +101,11 @@ EXPORT_SYMBOL_GPL(nfs_file_release); static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs_inode *nfsi = NFS_I(inode); - - if (nfs_have_delegated_attributes(inode)) - goto out_noreval; if (filp->f_flags & O_DIRECT) goto force_reval; - if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - goto force_reval; - if (nfs_attribute_timeout(inode)) + if (nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE)) goto force_reval; -out_noreval: return 0; force_reval: return __nfs_revalidate_inode(server, inode); @@ -374,7 +367,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, */ if (!PageUptodate(page)) { unsigned pglen = nfs_page_length(page); - unsigned end = offset + len; + unsigned end = offset + copied; if (pglen == 0) { zero_user_segments(page, 0, offset, @@ -535,10 +528,10 @@ const struct address_space_operations nfs_file_aops = { * writable, implying that someone is about to modify the page through a * shared-writable mapping */ -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int nfs_vm_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct file *filp = vma->vm_file; + struct file *filp = vmf->vma->vm_file; struct inode *inode = file_inode(filp); unsigned pagelen; int ret = VM_FAULT_NOPAGE; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index a3fc48ba4931..acd30baca461 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -202,10 +202,10 @@ static int filelayout_async_handle_error(struct rpc_task *task, task->tk_status); nfs4_mark_deviceid_unavailable(devid); pnfs_error_mark_layout_for_return(inode, lseg); - pnfs_set_lo_fail(lseg); rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: + pnfs_set_lo_fail(lseg); reset: dprintk("%s Retry through MDS. Error %d\n", __func__, task->tk_status); @@ -305,7 +305,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) } hdr->pgio_done_cb = filelayout_read_done_cb; - if (nfs41_setup_sequence(hdr->ds_clp->cl_session, + if (nfs4_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) @@ -403,7 +403,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) rpc_exit(task, 0); return; } - if (nfs41_setup_sequence(hdr->ds_clp->cl_session, + if (nfs4_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) @@ -438,7 +438,7 @@ static void filelayout_commit_prepare(struct rpc_task *task, void *data) { struct nfs_commit_data *wdata = data; - nfs41_setup_sequence(wdata->ds_clp->cl_session, + nfs4_setup_sequence(wdata->ds_clp, &wdata->args.seq_args, &wdata->res.seq_res, task); @@ -482,7 +482,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) u32 j, idx; struct nfs_fh *fh; - dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", + dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n", __func__, hdr->inode->i_ino, hdr->args.pgbase, (size_t)hdr->args.count, offset); @@ -540,7 +540,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) if (IS_ERR(ds_clnt)) return PNFS_NOT_ATTEMPTED; - dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", + dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n", __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); @@ -560,6 +560,50 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) return PNFS_ATTEMPTED; } +static int +filelayout_check_deviceid(struct pnfs_layout_hdr *lo, + struct nfs4_filelayout_segment *fl, + gfp_t gfp_flags) +{ + struct nfs4_deviceid_node *d; + struct nfs4_file_layout_dsaddr *dsaddr; + int status = -EINVAL; + + /* find and reference the deviceid */ + d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid, + lo->plh_lc_cred, gfp_flags); + if (d == NULL) + goto out; + + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); + /* Found deviceid is unavailable */ + if (filelayout_test_devid_unavailable(&dsaddr->id_node)) + goto out_put; + + fl->dsaddr = dsaddr; + + if (fl->first_stripe_index >= dsaddr->stripe_count) { + dprintk("%s Bad first_stripe_index %u\n", + __func__, fl->first_stripe_index); + goto out_put; + } + + if ((fl->stripe_type == STRIPE_SPARSE && + fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) || + (fl->stripe_type == STRIPE_DENSE && + fl->num_fh != dsaddr->stripe_count)) { + dprintk("%s num_fh %u not valid for given packing\n", + __func__, fl->num_fh); + goto out_put; + } + status = 0; +out: + return status; +out_put: + nfs4_fl_put_deviceid(dsaddr); + goto out; +} + /* * filelayout_check_layout() * @@ -572,11 +616,8 @@ static int filelayout_check_layout(struct pnfs_layout_hdr *lo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id, gfp_t gfp_flags) { - struct nfs4_deviceid_node *d; - struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; dprintk("--> %s\n", __func__); @@ -601,41 +642,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } - /* find and reference the deviceid */ - d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), id, - lo->plh_lc_cred, gfp_flags); - if (d == NULL) - goto out; - - dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); - /* Found deviceid is unavailable */ - if (filelayout_test_devid_unavailable(&dsaddr->id_node)) - goto out_put; - - fl->dsaddr = dsaddr; - - if (fl->first_stripe_index >= dsaddr->stripe_count) { - dprintk("%s Bad first_stripe_index %u\n", - __func__, fl->first_stripe_index); - goto out_put; - } - - if ((fl->stripe_type == STRIPE_SPARSE && - fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) || - (fl->stripe_type == STRIPE_DENSE && - fl->num_fh != dsaddr->stripe_count)) { - dprintk("%s num_fh %u not valid for given packing\n", - __func__, fl->num_fh); - goto out_put; - } - status = 0; out: dprintk("--> %s returns %d\n", __func__, status); return status; -out_put: - nfs4_fl_put_deviceid(dsaddr); - goto out; } static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) @@ -657,7 +667,6 @@ static int filelayout_decode_layout(struct pnfs_layout_hdr *flo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id, gfp_t gfp_flags) { struct xdr_stream stream; @@ -682,9 +691,9 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, if (unlikely(!p)) goto out_err; - memcpy(id, p, sizeof(*id)); + memcpy(&fl->deviceid, p, sizeof(fl->deviceid)); p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); - nfs4_print_deviceid(id); + nfs4_print_deviceid(&fl->deviceid); nfl_util = be32_to_cpup(p++); if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) @@ -831,15 +840,14 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, { struct nfs4_filelayout_segment *fl; int rc; - struct nfs4_deviceid id; dprintk("--> %s\n", __func__); fl = kzalloc(sizeof(*fl), gfp_flags); if (!fl) return NULL; - rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags); - if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) { + rc = filelayout_decode_layout(layoutid, fl, lgr, gfp_flags); + if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, gfp_flags)) { _filelayout_free_lseg(fl); return NULL; } @@ -888,18 +896,51 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return min(stripe_unit - (unsigned int)stripe_offset, size); } +static struct pnfs_layout_segment * +fl_pnfs_update_layout(struct inode *ino, + struct nfs_open_context *ctx, + loff_t pos, + u64 count, + enum pnfs_iomode iomode, + bool strict_iomode, + gfp_t gfp_flags) +{ + struct pnfs_layout_segment *lseg = NULL; + struct pnfs_layout_hdr *lo; + struct nfs4_filelayout_segment *fl; + int status; + + lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode, + gfp_flags); + if (!lseg) + lseg = ERR_PTR(-ENOMEM); + if (IS_ERR(lseg)) + goto out; + + lo = NFS_I(ino)->layout; + fl = FILELAYOUT_LSEG(lseg); + + status = filelayout_check_deviceid(lo, fl, gfp_flags); + if (status) + lseg = ERR_PTR(status); +out: + if (IS_ERR(lseg)) + pnfs_put_lseg(lseg); + return lseg; +} + static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { if (!pgio->pg_lseg) { - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - 0, - NFS4_MAX_UINT64, - IOMODE_READ, - false, - GFP_KERNEL); + pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_READ, + false, + GFP_KERNEL); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; @@ -919,13 +960,13 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, int status; if (!pgio->pg_lseg) { - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - 0, - NFS4_MAX_UINT64, - IOMODE_RW, - false, - GFP_NOFS); + pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_RW, + false, + GFP_NOFS); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index 2896cb833a11..79323b5dab0c 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h @@ -55,15 +55,16 @@ struct nfs4_file_layout_dsaddr { }; struct nfs4_filelayout_segment { - struct pnfs_layout_segment generic_hdr; - u32 stripe_type; - u32 commit_through_mds; - u32 stripe_unit; - u32 first_stripe_index; - u64 pattern_offset; - struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ - unsigned int num_fh; - struct nfs_fh **fh_array; + struct pnfs_layout_segment generic_hdr; + u32 stripe_type; + u32 commit_through_mds; + u32 stripe_unit; + u32 first_stripe_index; + u64 pattern_offset; + struct nfs4_deviceid deviceid; + struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ + unsigned int num_fh; + struct nfs_fh **fh_array; }; struct nfs4_filelayout { diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 4946ef40ba87..d913e818858f 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -266,6 +266,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); struct nfs4_pnfs_ds *ret = ds; struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); + int status; if (ds == NULL) { printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", @@ -277,13 +278,18 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) if (ds->ds_clp) goto out_test_devid; - nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, + status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, dataserver_retrans, 4, - s->nfs_client->cl_minorversion, - s->nfs_client->cl_rpcclient->cl_auth->au_flavor); + s->nfs_client->cl_minorversion); + if (status) { + nfs4_mark_deviceid_unavailable(devid); + ret = NULL; + goto out; + } out_test_devid: - if (filelayout_test_devid_unavailable(devid)) + if (ret->ds_clp == NULL || + filelayout_test_devid_unavailable(devid)) ret = NULL; out: return ret; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 98ace127bf86..42dedf2d625f 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -25,9 +25,20 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) +#define FF_LAYOUTRETURN_MAXERR 20 + static struct group_info *ff_zero_group; +static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, + struct nfs_pgio_header *hdr); +static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, + struct nfs42_layoutstat_devinfo *devinfo, + int dev_limit); +static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, + const struct nfs42_layoutstat_devinfo *devinfo, + struct nfs4_ff_layout_mirror *mirror); + static struct pnfs_layout_hdr * ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) { @@ -172,7 +183,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo, spin_lock(&inode->i_lock); list_for_each_entry(pos, &ff_layout->mirrors, mirrors) { - if (mirror->mirror_ds != pos->mirror_ds) + if (memcmp(&mirror->devid, &pos->devid, sizeof(pos->devid)) != 0) continue; if (!ff_mirror_match_fh(mirror, pos)) continue; @@ -349,19 +360,6 @@ static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) } } -static void ff_layout_mark_devices_valid(struct nfs4_ff_layout_segment *fls) -{ - struct nfs4_deviceid_node *node; - int i; - - if (!(fls->flags & FF_FLAGS_NO_IO_THRU_MDS)) - return; - for (i = 0; i < fls->mirror_array_cnt; i++) { - node = &fls->mirror_array[i]->mirror_ds->id_node; - clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); - } -} - static struct pnfs_layout_segment * ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, struct nfs4_layoutget_res *lgr, @@ -415,8 +413,6 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, for (i = 0; i < fls->mirror_array_cnt; i++) { struct nfs4_ff_layout_mirror *mirror; - struct nfs4_deviceid devid; - struct nfs4_deviceid_node *idnode; struct auth_cred acred = { .group_info = ff_zero_group }; struct rpc_cred __rcu *cred; u32 ds_count, fh_count, id; @@ -441,24 +437,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, fls->mirror_array[i]->ds_count = ds_count; /* deviceid */ - rc = decode_deviceid(&stream, &devid); + rc = decode_deviceid(&stream, &fls->mirror_array[i]->devid); if (rc) goto out_err_free; - idnode = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), - &devid, lh->plh_lc_cred, - gfp_flags); - /* - * upon success, mirror_ds is allocated by previous - * getdeviceinfo, or newly by .alloc_deviceid_node - * nfs4_find_get_deviceid failure is indeed getdeviceinfo falure - */ - if (idnode) - fls->mirror_array[i]->mirror_ds = - FF_LAYOUT_MIRROR_DS(idnode); - else - goto out_err_free; - /* efficiency */ rc = -EIO; p = xdr_inline_decode(&stream, 4); @@ -556,8 +538,6 @@ out_sort_mirrors: rc = ff_layout_check_layout(lgr); if (rc) goto out_err_free; - ff_layout_mark_devices_valid(fls); - ret = &fls->generic_hdr; dprintk("<-- %s (success)\n", __func__); out_free_page: @@ -639,12 +619,11 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, struct nfs4_ff_layoutstat *layoutstat, ktime_t now) { - static const ktime_t notime = {0}; s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL; struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout); nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now); - if (ktime_equal(mirror->start_time, notime)) + if (!mirror->start_time) mirror->start_time = now; if (mirror->report_interval != 0) report_interval = (s64)mirror->report_interval * 1000LL; @@ -702,6 +681,7 @@ nfs4_ff_layout_stat_io_start_read(struct inode *inode, spin_lock(&mirror->lock); report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat, now); nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); + set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); spin_unlock(&mirror->lock); if (report) @@ -718,6 +698,7 @@ nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat, requested, completed, ktime_get(), task->tk_start); + set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); spin_unlock(&mirror->lock); } @@ -731,6 +712,7 @@ nfs4_ff_layout_stat_io_start_write(struct inode *inode, spin_lock(&mirror->lock); report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat, now); nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); + set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); spin_unlock(&mirror->lock); if (report) @@ -750,6 +732,7 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, spin_lock(&mirror->lock); nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat, requested, completed, ktime_get(), task->tk_start); + set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); spin_unlock(&mirror->lock); } @@ -1070,9 +1053,6 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs_client *mds_client = mds_server->nfs_client; struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; - if (task->tk_status >= 0) - return 0; - switch (task->tk_status) { /* MDS state errors */ case -NFS4ERR_DELEG_REVOKED: @@ -1142,7 +1122,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, case -EPIPE: dprintk("%s DS connection error %d\n", __func__, task->tk_status); - nfs4_mark_deviceid_unavailable(devid); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, + &devid->deviceid); rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: @@ -1173,9 +1154,6 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); - if (task->tk_status >= 0) - return 0; - switch (task->tk_status) { /* File access problems. Don't mark the device as unavailable */ case -EACCES: @@ -1191,7 +1169,8 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, default: dprintk("%s DS connection error %d\n", __func__, task->tk_status); - nfs4_mark_deviceid_unavailable(devid); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, + &devid->deviceid); } /* FIXME: Need to prevent infinite looping here. */ return -NFS4ERR_RESET_TO_PNFS; @@ -1210,6 +1189,13 @@ static int ff_layout_async_handle_error(struct rpc_task *task, { int vers = clp->cl_nfs_mod->rpc_vers->number; + if (task->tk_status >= 0) + return 0; + + /* Handle the case of an invalid layout segment */ + if (!pnfs_is_valid_lseg(lseg)) + return -NFS4ERR_RESET_TO_PNFS; + switch (vers) { case 3: return ff_layout_async_handle_error_v3(task, lseg, idx); @@ -1293,6 +1279,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task, hdr->pgio_mirror_idx + 1, &hdr->pgio_mirror_idx)) goto out_eagain; + ff_layout_read_record_layoutstats_done(task, hdr); pnfs_read_resend_pnfs(hdr); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: @@ -1398,30 +1385,14 @@ static void ff_layout_read_prepare_v3(struct rpc_task *task, void *data) rpc_call_start(task); } -static int ff_layout_setup_sequence(struct nfs_client *ds_clp, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - struct rpc_task *task) -{ - if (ds_clp->cl_session) - return nfs41_setup_sequence(ds_clp->cl_session, - args, - res, - task); - return nfs40_setup_sequence(ds_clp->cl_slot_tbl, - args, - res, - task); -} - static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_setup_sequence(hdr->ds_clp, - &hdr->args.seq_args, - &hdr->res.seq_res, - task)) + if (nfs4_setup_sequence(hdr->ds_clp, + &hdr->args.seq_args, + &hdr->res.seq_res, + task)) return; if (ff_layout_read_prepare_common(task, hdr)) @@ -1592,10 +1563,10 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_setup_sequence(hdr->ds_clp, - &hdr->args.seq_args, - &hdr->res.seq_res, - task)) + if (nfs4_setup_sequence(hdr->ds_clp, + &hdr->args.seq_args, + &hdr->res.seq_res, + task)) return; if (ff_layout_write_prepare_common(task, hdr)) @@ -1681,10 +1652,10 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) { struct nfs_commit_data *wdata = data; - if (ff_layout_setup_sequence(wdata->ds_clp, - &wdata->args.seq_args, - &wdata->res.seq_res, - task)) + if (nfs4_setup_sequence(wdata->ds_clp, + &wdata->args.seq_args, + &wdata->res.seq_res, + task)) return; ff_layout_commit_prepare_common(task, data); } @@ -1765,7 +1736,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) int vers; struct nfs_fh *fh; - dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", + dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n", __func__, hdr->inode->i_ino, hdr->args.pgbase, (size_t)hdr->args.count, offset); @@ -1842,7 +1813,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) vers = nfs4_ff_layout_ds_version(lseg, idx); - dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d vers %d\n", + dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n", __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); @@ -1961,38 +1932,85 @@ ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d) id_node)); } -static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, - struct xdr_stream *xdr, - const struct nfs4_layoutreturn_args *args) +static int ff_layout_encode_ioerr(struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args, + const struct nfs4_flexfile_layoutreturn_args *ff_args) { - struct pnfs_layout_hdr *hdr = &flo->generic_hdr; __be32 *start; - int count = 0, ret = 0; start = xdr_reserve_space(xdr, 4); if (unlikely(!start)) return -E2BIG; + *start = cpu_to_be32(ff_args->num_errors); /* This assume we always return _ALL_ layouts */ - spin_lock(&hdr->plh_inode->i_lock); - ret = ff_layout_encode_ds_ioerr(flo, xdr, &count, &args->range); - spin_unlock(&hdr->plh_inode->i_lock); + return ff_layout_encode_ds_ioerr(xdr, &ff_args->errors); +} - *start = cpu_to_be32(count); +static void +encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) +{ + WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0); +} - return ret; +static void +ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr, + const nfs4_stateid *stateid, + const struct nfs42_layoutstat_devinfo *devinfo) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 8 + 8); + p = xdr_encode_hyper(p, devinfo->offset); + p = xdr_encode_hyper(p, devinfo->length); + encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); + p = xdr_reserve_space(xdr, 4*8); + p = xdr_encode_hyper(p, devinfo->read_count); + p = xdr_encode_hyper(p, devinfo->read_bytes); + p = xdr_encode_hyper(p, devinfo->write_count); + p = xdr_encode_hyper(p, devinfo->write_bytes); + encode_opaque_fixed(xdr, devinfo->dev_id.data, NFS4_DEVICEID4_SIZE); +} + +static void +ff_layout_encode_ff_iostat(struct xdr_stream *xdr, + const nfs4_stateid *stateid, + const struct nfs42_layoutstat_devinfo *devinfo) +{ + ff_layout_encode_ff_iostat_head(xdr, stateid, devinfo); + ff_layout_encode_ff_layoutupdate(xdr, devinfo, + devinfo->ld_private.data); } /* report nothing for now */ -static void ff_layout_encode_iostats(struct nfs4_flexfile_layout *flo, - struct xdr_stream *xdr, - const struct nfs4_layoutreturn_args *args) +static void ff_layout_encode_iostats_array(struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args, + struct nfs4_flexfile_layoutreturn_args *ff_args) { __be32 *p; + int i; p = xdr_reserve_space(xdr, 4); - if (likely(p)) - *p = cpu_to_be32(0); + *p = cpu_to_be32(ff_args->num_dev); + for (i = 0; i < ff_args->num_dev; i++) + ff_layout_encode_ff_iostat(xdr, + &args->layout->plh_stateid, + &ff_args->devinfo[i]); +} + +static void +ff_layout_free_iostats_array(struct nfs42_layoutstat_devinfo *devinfo, + unsigned int num_entries) +{ + unsigned int i; + + for (i = 0; i < num_entries; i++) { + if (!devinfo[i].ld_private.ops) + continue; + if (!devinfo[i].ld_private.ops->free) + continue; + devinfo[i].ld_private.ops->free(&devinfo[i].ld_private); + } } static struct nfs4_deviceid_node * @@ -2008,24 +2026,91 @@ ff_layout_alloc_deviceid_node(struct nfs_server *server, } static void -ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, - struct xdr_stream *xdr, - const struct nfs4_layoutreturn_args *args) -{ - struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); +ff_layout_encode_layoutreturn(struct xdr_stream *xdr, + const void *voidargs, + const struct nfs4_xdr_opaque_data *ff_opaque) +{ + const struct nfs4_layoutreturn_args *args = voidargs; + struct nfs4_flexfile_layoutreturn_args *ff_args = ff_opaque->data; + struct xdr_buf tmp_buf = { + .head = { + [0] = { + .iov_base = page_address(ff_args->pages[0]), + }, + }, + .buflen = PAGE_SIZE, + }; + struct xdr_stream tmp_xdr; __be32 *start; dprintk("%s: Begin\n", __func__); - start = xdr_reserve_space(xdr, 4); - BUG_ON(!start); - ff_layout_encode_ioerr(flo, xdr, args); - ff_layout_encode_iostats(flo, xdr, args); + xdr_init_encode(&tmp_xdr, &tmp_buf, NULL); + + ff_layout_encode_ioerr(&tmp_xdr, args, ff_args); + ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args); + + start = xdr_reserve_space(xdr, 4); + *start = cpu_to_be32(tmp_buf.len); + xdr_write_pages(xdr, ff_args->pages, 0, tmp_buf.len); - *start = cpu_to_be32((xdr->p - start - 1) * 4); dprintk("%s: Return\n", __func__); } +static void +ff_layout_free_layoutreturn(struct nfs4_xdr_opaque_data *args) +{ + struct nfs4_flexfile_layoutreturn_args *ff_args; + + if (!args->data) + return; + ff_args = args->data; + args->data = NULL; + + ff_layout_free_ds_ioerr(&ff_args->errors); + ff_layout_free_iostats_array(ff_args->devinfo, ff_args->num_dev); + + put_page(ff_args->pages[0]); + kfree(ff_args); +} + +static const struct nfs4_xdr_opaque_ops layoutreturn_ops = { + .encode = ff_layout_encode_layoutreturn, + .free = ff_layout_free_layoutreturn, +}; + +static int +ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args) +{ + struct nfs4_flexfile_layoutreturn_args *ff_args; + struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(args->layout); + + ff_args = kmalloc(sizeof(*ff_args), GFP_KERNEL); + if (!ff_args) + goto out_nomem; + ff_args->pages[0] = alloc_page(GFP_KERNEL); + if (!ff_args->pages[0]) + goto out_nomem_free; + + INIT_LIST_HEAD(&ff_args->errors); + ff_args->num_errors = ff_layout_fetch_ds_ioerr(args->layout, + &args->range, &ff_args->errors, + FF_LAYOUTRETURN_MAXERR); + + spin_lock(&args->inode->i_lock); + ff_args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, + &ff_args->devinfo[0], ARRAY_SIZE(ff_args->devinfo)); + spin_unlock(&args->inode->i_lock); + + args->ld_private->ops = &layoutreturn_ops; + args->ld_private->data = ff_args; + return 0; +out_nomem_free: + kfree(ff_args); +out_nomem: + return -ENOMEM; +} + static int ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen) { @@ -2146,21 +2231,18 @@ ff_layout_encode_io_latency(struct xdr_stream *xdr, } static void -ff_layout_encode_layoutstats(struct xdr_stream *xdr, - struct nfs42_layoutstat_args *args, - struct nfs42_layoutstat_devinfo *devinfo) +ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, + const struct nfs42_layoutstat_devinfo *devinfo, + struct nfs4_ff_layout_mirror *mirror) { - struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private; struct nfs4_pnfs_ds_addr *da; struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds; struct nfs_fh *fh = &mirror->fh_versions[0]; - __be32 *p, *start; + __be32 *p; da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); dprintk("%s: DS %s: encoding address %s\n", __func__, ds->ds_remotestr, da->da_remotestr); - /* layoutupdate length */ - start = xdr_reserve_space(xdr, 4); /* netaddr4 */ ff_layout_encode_netaddr(xdr, da); /* nfs_fh4 */ @@ -2177,42 +2259,71 @@ ff_layout_encode_layoutstats(struct xdr_stream *xdr, /* bool */ p = xdr_reserve_space(xdr, 4); *p = cpu_to_be32(false); +} + +static void +ff_layout_encode_layoutstats(struct xdr_stream *xdr, const void *args, + const struct nfs4_xdr_opaque_data *opaque) +{ + struct nfs42_layoutstat_devinfo *devinfo = container_of(opaque, + struct nfs42_layoutstat_devinfo, ld_private); + __be32 *start; + + /* layoutupdate length */ + start = xdr_reserve_space(xdr, 4); + ff_layout_encode_ff_layoutupdate(xdr, devinfo, opaque->data); *start = cpu_to_be32((xdr->p - start - 1) * 4); } +static void +ff_layout_free_layoutstats(struct nfs4_xdr_opaque_data *opaque) +{ + struct nfs4_ff_layout_mirror *mirror = opaque->data; + + ff_layout_put_mirror(mirror); +} + +static const struct nfs4_xdr_opaque_ops layoutstat_ops = { + .encode = ff_layout_encode_layoutstats, + .free = ff_layout_free_layoutstats, +}; + static int -ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, - struct pnfs_layout_hdr *lo, +ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, + struct nfs42_layoutstat_devinfo *devinfo, int dev_limit) { struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo); struct nfs4_ff_layout_mirror *mirror; struct nfs4_deviceid_node *dev; - struct nfs42_layoutstat_devinfo *devinfo; int i = 0; list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) { if (i >= dev_limit) break; - if (!mirror->mirror_ds) + if (IS_ERR_OR_NULL(mirror->mirror_ds)) + continue; + if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags)) continue; /* mirror refcount put in cleanup_layoutstats */ if (!atomic_inc_not_zero(&mirror->ref)) continue; dev = &mirror->mirror_ds->id_node; - devinfo = &args->devinfo[i]; memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); devinfo->offset = 0; devinfo->length = NFS4_MAX_UINT64; + spin_lock(&mirror->lock); devinfo->read_count = mirror->read_stat.io_stat.ops_completed; devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed; devinfo->write_count = mirror->write_stat.io_stat.ops_completed; devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; + spin_unlock(&mirror->lock); devinfo->layout_type = LAYOUT_FLEX_FILES; - devinfo->layoutstats_encode = ff_layout_encode_layoutstats; - devinfo->layout_private = mirror; + devinfo->ld_private.ops = &layoutstat_ops; + devinfo->ld_private.data = mirror; + devinfo++; i++; } return i; @@ -2222,47 +2333,27 @@ static int ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) { struct nfs4_flexfile_layout *ff_layout; - struct nfs4_ff_layout_mirror *mirror; - int dev_count = 0; + const int dev_count = PNFS_LAYOUTSTATS_MAXDEV; - spin_lock(&args->inode->i_lock); - ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout); - list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) { - if (atomic_read(&mirror->ref) != 0) - dev_count ++; - } - spin_unlock(&args->inode->i_lock); /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ - if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) { - dprintk("%s: truncating devinfo to limit (%d:%d)\n", - __func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV); - dev_count = PNFS_LAYOUTSTATS_MAXDEV; - } args->devinfo = kmalloc_array(dev_count, sizeof(*args->devinfo), GFP_NOIO); if (!args->devinfo) return -ENOMEM; spin_lock(&args->inode->i_lock); - args->num_dev = ff_layout_mirror_prepare_stats(args, - &ff_layout->generic_hdr, dev_count); + ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout); + args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, + &args->devinfo[0], dev_count); spin_unlock(&args->inode->i_lock); + if (!args->num_dev) { + kfree(args->devinfo); + args->devinfo = NULL; + return -ENOENT; + } return 0; } -static void -ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data) -{ - struct nfs4_ff_layout_mirror *mirror; - int i; - - for (i = 0; i < data->args.num_dev; i++) { - mirror = data->args.devinfo[i].layout_private; - data->args.devinfo[i].layout_private = NULL; - ff_layout_put_mirror(mirror); - } -} - static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", @@ -2284,10 +2375,9 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .read_pagelist = ff_layout_read_pagelist, .write_pagelist = ff_layout_write_pagelist, .alloc_deviceid_node = ff_layout_alloc_deviceid_node, - .encode_layoutreturn = ff_layout_encode_layoutreturn, + .prepare_layoutreturn = ff_layout_prepare_layoutreturn, .sync = pnfs_nfs_generic_sync, .prepare_layoutstats = ff_layout_prepare_layoutstats, - .cleanup_layoutstats = ff_layout_cleanup_layoutstats, }; static int __init nfs4flexfilelayout_init(void) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 3ee0c9fcea76..98b34c9b0564 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -21,6 +21,7 @@ /* LAYOUTSTATS report interval in ms */ #define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) +#define FF_LAYOUTSTATS_MAXDEV 4 struct nfs4_ff_ds_version { u32 version; @@ -73,6 +74,7 @@ struct nfs4_ff_layout_mirror { struct list_head mirrors; u32 ds_count; u32 efficiency; + struct nfs4_deviceid devid; struct nfs4_ff_layout_ds *mirror_ds; u32 fh_versions_cnt; struct nfs_fh *fh_versions; @@ -81,12 +83,15 @@ struct nfs4_ff_layout_mirror { struct rpc_cred __rcu *rw_cred; atomic_t ref; spinlock_t lock; + unsigned long flags; struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; ktime_t start_time; u32 report_interval; }; +#define NFS4_FF_MIRROR_STAT_AVAIL (0) + struct nfs4_ff_layout_segment { struct pnfs_layout_segment generic_hdr; u64 stripe_unit; @@ -103,6 +108,14 @@ struct nfs4_flexfile_layout { ktime_t last_report_time; /* Layoutstat report times */ }; +struct nfs4_flexfile_layoutreturn_args { + struct list_head errors; + struct nfs42_layoutstat_devinfo devinfo[FF_LAYOUTSTATS_MAXDEV]; + unsigned int num_errors; + unsigned int num_dev; + struct page *pages[1]; +}; + static inline struct nfs4_flexfile_layout * FF_LAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo) { @@ -162,7 +175,19 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg) static inline bool ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) { - return nfs4_test_deviceid_unavailable(node); + /* + * Flexfiles should never mark a DS unavailable, but if it does + * print a (ratelimited) warning as this can affect performance. + */ + if (nfs4_test_deviceid_unavailable(node)) { + u32 *p = (u32 *)node->deviceid.data; + + pr_warn_ratelimited("NFS: flexfiles layout referencing an " + "unavailable device [%x%x%x%x]\n", + p[0], p[1], p[2], p[3]); + return true; + } + return false; } static inline int @@ -180,9 +205,12 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, struct nfs4_ff_layout_mirror *mirror, u64 offset, u64 length, int status, enum nfs_opnum4 opnum, gfp_t gfp_flags); -int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, - struct xdr_stream *xdr, int *count, - const struct pnfs_layout_range *range); +int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head); +void ff_layout_free_ds_ioerr(struct list_head *head); +unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, + const struct pnfs_layout_range *range, + struct list_head *head, + unsigned int maxnum); struct nfs_fh * nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); @@ -197,7 +225,6 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, struct inode *inode); struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, struct rpc_cred *mdscred); -bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg); bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg); diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index f7a3f6b05369..457cfeb1d5c1 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -20,9 +20,11 @@ static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; static unsigned int dataserver_retrans; +static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); + void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) { - if (mirror_ds) + if (!IS_ERR_OR_NULL(mirror_ds)) nfs4_put_deviceid_node(&mirror_ds->id_node); } @@ -175,20 +177,41 @@ out_err: static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg, struct nfs4_deviceid_node *devid) { - nfs4_mark_deviceid_unavailable(devid); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); if (!ff_layout_has_available_ds(lseg)) pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); } static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, - struct nfs4_ff_layout_mirror *mirror) + struct nfs4_ff_layout_mirror *mirror, + bool create) { - if (mirror == NULL || mirror->mirror_ds == NULL) { - pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, - lseg); - return false; + if (mirror == NULL || IS_ERR(mirror->mirror_ds)) + goto outerr; + if (mirror->mirror_ds == NULL) { + if (create) { + struct nfs4_deviceid_node *node; + struct pnfs_layout_hdr *lh = lseg->pls_layout; + struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); + + node = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), + &mirror->devid, lh->plh_lc_cred, + GFP_KERNEL); + if (node) + mirror_ds = FF_LAYOUT_MIRROR_DS(node); + + /* check for race with another call to this function */ + if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && + mirror_ds != ERR_PTR(-ENODEV)) + nfs4_put_deviceid_node(node); + } else + goto outerr; } + + if (IS_ERR(mirror->mirror_ds)) + goto outerr; + if (mirror->mirror_ds->ds == NULL) { struct nfs4_deviceid_node *devid; devid = &mirror->mirror_ds->id_node; @@ -196,15 +219,9 @@ static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, return false; } return true; -} - -static u64 -end_offset(u64 start, u64 len) -{ - u64 end; - - end = start + len; - return end >= start ? end : NFS4_MAX_UINT64; +outerr: + pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); + return false; } static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, @@ -212,8 +229,8 @@ static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, { u64 end; - end = max_t(u64, end_offset(err->offset, err->length), - end_offset(offset, length)); + end = max_t(u64, pnfs_end_offset(err->offset, err->length), + pnfs_end_offset(offset, length)); err->offset = min_t(u64, err->offset, offset); err->length = end - err->offset; } @@ -235,9 +252,9 @@ ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1, ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid)); if (ret != 0) return ret; - if (end_offset(e1->offset, e1->length) < e2->offset) + if (pnfs_end_offset(e1->offset, e1->length) < e2->offset) return -1; - if (e1->offset > end_offset(e2->offset, e2->length)) + if (e1->offset > pnfs_end_offset(e2->offset, e2->length)) return 1; /* If ranges overlap or are contiguous, they are the same */ return 0; @@ -263,8 +280,9 @@ ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, } /* Entries match, so merge "err" into "dserr" */ extend_ds_error(dserr, err->offset, err->length); - list_del(&err->list); + list_replace(&err->list, &dserr->list); kfree(err); + return; } list_add_tail(&dserr->list, head); @@ -331,7 +349,7 @@ nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); struct nfs_fh *fh = NULL; - if (!ff_layout_mirror_valid(lseg, mirror)) { + if (!ff_layout_mirror_valid(lseg, mirror, false)) { pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", __func__, mirror_idx); goto out; @@ -370,8 +388,9 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, struct inode *ino = lseg->pls_layout->plh_inode; struct nfs_server *s = NFS_SERVER(ino); unsigned int max_payload; + int status; - if (!ff_layout_mirror_valid(lseg, mirror)) { + if (!ff_layout_mirror_valid(lseg, mirror, true)) { pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", __func__, ds_idx); goto out; @@ -390,11 +409,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, /* FIXME: For now we assume the server sent only one version of NFS * to use for the DS. */ - nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, + status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, dataserver_retrans, mirror->mirror_ds->ds_versions[0].version, - mirror->mirror_ds->ds_versions[0].minor_version, - RPC_AUTH_UNIX); + mirror->mirror_ds->ds_versions[0].minor_version); /* connect success, check rsize/wsize limit */ if (ds->ds_clp) { @@ -407,11 +425,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, mirror->mirror_ds->ds_versions[0].wsize = max_payload; goto out; } +out_fail: ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), mirror, lseg->pls_range.offset, lseg->pls_range.length, NFS4ERR_NXIO, OP_ILLEGAL, GFP_NOIO); -out_fail: if (fail_return || !ff_layout_has_available_ds(lseg)) pnfs_error_mark_layout_for_return(ino, lseg); ds = NULL; @@ -457,28 +475,26 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, } } -static bool is_range_intersecting(u64 offset1, u64 length1, - u64 offset2, u64 length2) +void ff_layout_free_ds_ioerr(struct list_head *head) { - u64 end1 = end_offset(offset1, length1); - u64 end2 = end_offset(offset2, length2); + struct nfs4_ff_layout_ds_err *err; - return (end1 == NFS4_MAX_UINT64 || end1 > offset2) && - (end2 == NFS4_MAX_UINT64 || end2 > offset1); + while (!list_empty(head)) { + err = list_first_entry(head, + struct nfs4_ff_layout_ds_err, + list); + list_del(&err->list); + kfree(err); + } } /* called with inode i_lock held */ -int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, - struct xdr_stream *xdr, int *count, - const struct pnfs_layout_range *range) +int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head) { - struct nfs4_ff_layout_ds_err *err, *n; + struct nfs4_ff_layout_ds_err *err; __be32 *p; - list_for_each_entry_safe(err, n, &flo->error_list, list) { - if (!is_range_intersecting(err->offset, err->length, - range->offset, range->length)) - continue; + list_for_each_entry(err, head, list) { /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) * + array length + deviceid(NFS4_DEVICEID4_SIZE) * + status(4) + opnum(4) @@ -497,17 +513,59 @@ int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, NFS4_DEVICEID4_SIZE); *p++ = cpu_to_be32(err->status); *p++ = cpu_to_be32(err->opnum); - *count += 1; - list_del(&err->list); - dprintk("%s: offset %llu length %llu status %d op %d count %d\n", + dprintk("%s: offset %llu length %llu status %d op %d\n", __func__, err->offset, err->length, err->status, - err->opnum, *count); - kfree(err); + err->opnum); } return 0; } +static +unsigned int do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, + const struct pnfs_layout_range *range, + struct list_head *head, + unsigned int maxnum) +{ + struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); + struct inode *inode = lo->plh_inode; + struct nfs4_ff_layout_ds_err *err, *n; + unsigned int ret = 0; + + spin_lock(&inode->i_lock); + list_for_each_entry_safe(err, n, &flo->error_list, list) { + if (!pnfs_is_range_intersecting(err->offset, + pnfs_end_offset(err->offset, err->length), + range->offset, + pnfs_end_offset(range->offset, range->length))) + continue; + if (!maxnum) + break; + list_move(&err->list, head); + maxnum--; + ret++; + } + spin_unlock(&inode->i_lock); + return ret; +} + +unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, + const struct pnfs_layout_range *range, + struct list_head *head, + unsigned int maxnum) +{ + unsigned int ret; + + ret = do_layout_fetch_ds_ioerr(lo, range, head, maxnum); + /* If we're over the max, discard all remaining entries */ + if (ret == maxnum) { + LIST_HEAD(discard); + do_layout_fetch_ds_ioerr(lo, range, &discard, -1); + ff_layout_free_ds_ioerr(&discard); + } + return ret; +} + static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg) { struct nfs4_ff_layout_mirror *mirror; @@ -516,7 +574,11 @@ static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg) for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { mirror = FF_LAYOUT_COMP(lseg, idx); - if (mirror && mirror->mirror_ds) { + if (mirror) { + if (!mirror->mirror_ds) + return true; + if (IS_ERR(mirror->mirror_ds)) + continue; devid = &mirror->mirror_ds->id_node; if (!ff_layout_test_devid_unavailable(devid)) return true; @@ -534,8 +596,10 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { mirror = FF_LAYOUT_COMP(lseg, idx); - if (!mirror || !mirror->mirror_ds) + if (!mirror || IS_ERR(mirror->mirror_ds)) return false; + if (!mirror->mirror_ds) + continue; devid = &mirror->mirror_ds->id_node; if (ff_layout_test_devid_unavailable(devid)) return false; @@ -544,7 +608,7 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) return FF_LAYOUT_MIRROR_COUNT(lseg) != 0; } -bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) +static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) { if (lseg->pls_range.iomode == IOMODE_READ) return ff_read_layout_has_available_ds(lseg); diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index a608ffd28acc..391dafaf9182 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -30,7 +30,7 @@ #include <linux/namei.h> #include <linux/security.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "internal.h" diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bf4ec5ecc97e..f489a5a71bd5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -15,7 +15,7 @@ #include <linux/module.h> #include <linux/init.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -39,7 +39,7 @@ #include <linux/compat.h> #include <linux/freezer.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "nfs4_fs.h" #include "callback.h" @@ -160,6 +160,43 @@ int nfs_sync_mapping(struct address_space *mapping) return ret; } +static int nfs_attribute_timeout(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); +} + +static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags) +{ + unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + + /* Special case for the pagecache or access cache */ + if (flags == NFS_INO_REVAL_PAGECACHE && + !(cache_validity & NFS_INO_REVAL_FORCED)) + return false; + return (cache_validity & flags) != 0; +} + +static bool nfs_check_cache_invalid_not_delegated(struct inode *inode, unsigned long flags) +{ + unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + + if ((cache_validity & flags) != 0) + return true; + if (nfs_attribute_timeout(inode)) + return true; + return false; +} + +bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) +{ + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + return nfs_check_cache_invalid_delegated(inode, flags); + + return nfs_check_cache_invalid_not_delegated(inode, flags); +} + static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); @@ -634,15 +671,28 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); -static void nfs_request_parent_use_readdirplus(struct dentry *dentry) +static void nfs_readdirplus_parent_cache_miss(struct dentry *dentry) { struct dentry *parent; + if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS)) + return; parent = dget_parent(dentry); nfs_force_use_readdirplus(d_inode(parent)); dput(parent); } +static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry) +{ + struct dentry *parent; + + if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS)) + return; + parent = dget_parent(dentry); + nfs_advise_use_readdirplus(d_inode(parent)); + dput(parent); +} + static bool nfs_need_revalidate_inode(struct inode *inode) { if (NFS_I(inode)->cache_validity & @@ -653,9 +703,10 @@ static bool nfs_need_revalidate_inode(struct inode *inode) return false; } -int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int nfs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err = 0; @@ -676,17 +727,17 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) * - NFS never sets MS_NOATIME or MS_NODIRATIME so there is * no point in checking those. */ - if ((mnt->mnt_flags & MNT_NOATIME) || - ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) + if ((path->mnt->mnt_flags & MNT_NOATIME) || + ((path->mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) need_atime = 0; if (need_atime || nfs_need_revalidate_inode(inode)) { struct nfs_server *server = NFS_SERVER(inode); - if (server->caps & NFS_CAP_READDIRPLUS) - nfs_request_parent_use_readdirplus(dentry); + nfs_readdirplus_parent_cache_miss(path->dentry); err = __nfs_revalidate_inode(server, inode); - } + } else + nfs_readdirplus_parent_cache_hit(path->dentry); if (!err) { generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); @@ -702,8 +753,7 @@ EXPORT_SYMBOL_GPL(nfs_getattr); static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { atomic_set(&l_ctx->count, 1); - l_ctx->lockowner.l_owner = current->files; - l_ctx->lockowner.l_pid = current->tgid; + l_ctx->lockowner = current->files; INIT_LIST_HEAD(&l_ctx->list); atomic_set(&l_ctx->io_count, 0); } @@ -714,9 +764,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context struct nfs_lock_context *pos = head; do { - if (pos->lockowner.l_owner != current->files) - continue; - if (pos->lockowner.l_pid != current->tgid) + if (pos->lockowner != current->files) continue; atomic_inc(&pos->count); return pos; @@ -785,6 +833,8 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) if (!is_sync) return; inode = d_inode(ctx->dentry); + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + return; nfsi = NFS_I(inode); if (inode->i_mapping->nrpages == 0) return; @@ -799,7 +849,9 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) } EXPORT_SYMBOL_GPL(nfs_close_context); -struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode) +struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, + fmode_t f_mode, + struct file *filp) { struct nfs_open_context *ctx; struct rpc_cred *cred = rpc_lookup_cred(); @@ -818,6 +870,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f ctx->mode = f_mode; ctx->flags = 0; ctx->error = 0; + ctx->flock_owner = (fl_owner_t)filp; nfs_init_lock_context(&ctx->lock_context); ctx->lock_context.open_context = ctx; INIT_LIST_HEAD(&ctx->list); @@ -942,7 +995,7 @@ int nfs_open(struct inode *inode, struct file *filp) { struct nfs_open_context *ctx; - ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode); + ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); if (IS_ERR(ctx)) return PTR_ERR(ctx); nfs_file_set_open_context(filp, ctx); @@ -1031,13 +1084,6 @@ out: return status; } -int nfs_attribute_timeout(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); -} - int nfs_attribute_cache_expired(struct inode *inode) { if (nfs_have_delegated_attributes(inode)) @@ -1060,15 +1106,6 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } EXPORT_SYMBOL_GPL(nfs_revalidate_inode); -int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode) -{ - if (!(NFS_I(inode)->cache_validity & - (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) - && !nfs_attribute_cache_expired(inode)) - return NFS_STALE(inode) ? -ESTALE : 0; - return -ECHILD; -} - static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); @@ -1099,13 +1136,10 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map return 0; } -static bool nfs_mapping_need_revalidate_inode(struct inode *inode) +bool nfs_mapping_need_revalidate_inode(struct inode *inode) { - if (nfs_have_delegated_attributes(inode)) - return false; - return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) - || nfs_attribute_timeout(inode) - || NFS_STALE(inode); + return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) || + NFS_STALE(inode); } int nfs_revalidate_mapping_rcu(struct inode *inode) @@ -1317,7 +1351,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat invalid |= NFS_INO_INVALID_ATIME; if (invalid != 0) - nfs_set_cache_invalid(inode, invalid); + nfs_set_cache_invalid(inode, invalid | NFS_INO_REVAL_FORCED); nfsi->read_cache_jiffies = fattr->time_start; return 0; @@ -1517,13 +1551,6 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr { unsigned long invalid = NFS_INO_INVALID_ATTR; - /* - * Don't revalidate the pagecache if we hold a delegation, but do - * force an attribute update - */ - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) - invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_FORCED; - if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; nfs_set_cache_invalid(inode, invalid); @@ -2015,7 +2042,7 @@ static void nfsiod_stop(void) destroy_workqueue(wq); } -int nfs_net_id; +unsigned int nfs_net_id; EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 80bcc0befb07..7b38fedb7e03 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -154,8 +154,7 @@ extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); int nfs_create_rpc_client(struct nfs_client *, const struct nfs_client_initdata *, rpc_authflavor_t); -struct nfs_client *nfs_get_client(const struct nfs_client_initdata *, - rpc_authflavor_t); +struct nfs_client *nfs_get_client(const struct nfs_client_initdata *); int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); void nfs_server_insert_lists(struct nfs_server *); void nfs_server_remove_lists(struct nfs_server *); @@ -187,6 +186,8 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); +extern bool nfs_client_init_is_complete(const struct nfs_client *clp); +extern int nfs_client_init_status(const struct nfs_client *clp); extern int nfs_wait_client_init_complete(const struct nfs_client *clp); extern void nfs_mark_client_ready(struct nfs_client *clp, int state); extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, @@ -194,14 +195,13 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, - u32 minor_version, - rpc_authflavor_t au_flavor); + u32 minor_version); extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, struct inode *); extern struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, const struct sockaddr *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, - unsigned int ds_retrans, rpc_authflavor_t au_flavor); + unsigned int ds_retrans); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); @@ -346,6 +346,7 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, const struct nfs_client_initdata *); /* dir.c */ +extern void nfs_advise_use_readdirplus(struct inode *dir); extern void nfs_force_use_readdirplus(struct inode *dir); extern unsigned long nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc); @@ -382,6 +383,7 @@ extern int nfs_drop_inode(struct inode *); extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); +extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); extern int nfs_wait_atomic_killable(atomic_t *p); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 5551e8ef67fd..786f17580582 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -178,11 +178,12 @@ out_nofree: } static int -nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +nfs_namespace_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - if (NFS_FH(d_inode(dentry))->size != 0) - return nfs_getattr(mnt, dentry, stat); - generic_fillattr(d_inode(dentry), stat); + if (NFS_FH(d_inode(path->dentry))->size != 0) + return nfs_getattr(path, stat, request_mask, query_flags); + generic_fillattr(d_inode(path->dentry), stat); return 0; } @@ -226,7 +227,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, const char *devname, struct nfs_clone_mount *mountdata) { - return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); + return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata); } /** diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index fbce0d885d4c..5fbd2bde91ba 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -35,6 +35,6 @@ struct nfs_net { #endif }; -extern int nfs_net_id; +extern unsigned int nfs_net_id; #endif diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index ee753547fb0a..7879f2a0fcfd 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -78,8 +78,7 @@ struct nfs_server *nfs3_clone_server(struct nfs_server *source, */ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, const struct sockaddr *ds_addr, int ds_addrlen, - int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, - rpc_authflavor_t au_flavor) + int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) { struct rpc_timeout ds_timeout; struct nfs_client *mds_clp = mds_srv->nfs_client; @@ -106,7 +105,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, /* Use the MDS nfs_client cl_ipaddr. */ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); - clp = nfs_get_client(&cl_init, au_flavor); + clp = nfs_get_client(&cl_init); return clp; } diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 608501971fe0..1e486c73ec94 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -12,6 +12,7 @@ #include "nfs42.h" #include "iostat.h" #include "pnfs.h" +#include "nfs4session.h" #include "internal.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -128,30 +129,26 @@ out_unlock: return err; } -static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src, +static ssize_t _nfs42_proc_copy(struct file *src, struct nfs_lock_context *src_lock, - struct file *dst, loff_t pos_dst, + struct file *dst, struct nfs_lock_context *dst_lock, - size_t count) + struct nfs42_copy_args *args, + struct nfs42_copy_res *res) { - struct nfs42_copy_args args = { - .src_fh = NFS_FH(file_inode(src)), - .src_pos = pos_src, - .dst_fh = NFS_FH(file_inode(dst)), - .dst_pos = pos_dst, - .count = count, - }; - struct nfs42_copy_res res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY], - .rpc_argp = &args, - .rpc_resp = &res, + .rpc_argp = args, + .rpc_resp = res, }; struct inode *dst_inode = file_inode(dst); struct nfs_server *server = NFS_SERVER(dst_inode); + loff_t pos_src = args->src_pos; + loff_t pos_dst = args->dst_pos; + size_t count = args->count; int status; - status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context, + status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context, src_lock, FMODE_READ); if (status) return status; @@ -161,7 +158,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src, if (status) return status; - status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context, + status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context, dst_lock, FMODE_WRITE); if (status) return status; @@ -171,22 +168,22 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src, return status; status = nfs4_call_sync(server->client, server, &msg, - &args.seq_args, &res.seq_res, 0); + &args->seq_args, &res->seq_res, 0); if (status == -ENOTSUPP) server->caps &= ~NFS_CAP_COPY; if (status) return status; - if (res.write_res.verifier.committed != NFS_FILE_SYNC) { - status = nfs_commit_file(dst, &res.write_res.verifier.verifier); + if (res->write_res.verifier.committed != NFS_FILE_SYNC) { + status = nfs_commit_file(dst, &res->write_res.verifier.verifier); if (status) return status; } truncate_pagecache_range(dst_inode, pos_dst, - pos_dst + res.write_res.count); + pos_dst + res->write_res.count); - return res.write_res.count; + return res->write_res.count; } ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, @@ -196,8 +193,22 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, struct nfs_server *server = NFS_SERVER(file_inode(dst)); struct nfs_lock_context *src_lock; struct nfs_lock_context *dst_lock; - struct nfs4_exception src_exception = { }; - struct nfs4_exception dst_exception = { }; + struct nfs42_copy_args args = { + .src_fh = NFS_FH(file_inode(src)), + .src_pos = pos_src, + .dst_fh = NFS_FH(file_inode(dst)), + .dst_pos = pos_dst, + .count = count, + }; + struct nfs42_copy_res res; + struct nfs4_exception src_exception = { + .inode = file_inode(src), + .stateid = &args.src_stateid, + }; + struct nfs4_exception dst_exception = { + .inode = file_inode(dst), + .stateid = &args.dst_stateid, + }; ssize_t err, err2; if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY)) @@ -207,7 +218,6 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, if (IS_ERR(src_lock)) return PTR_ERR(src_lock); - src_exception.inode = file_inode(src); src_exception.state = src_lock->open_context->state; dst_lock = nfs_get_lock_context(nfs_file_open_context(dst)); @@ -216,15 +226,17 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, goto out_put_src_lock; } - dst_exception.inode = file_inode(dst); dst_exception.state = dst_lock->open_context->state; do { inode_lock(file_inode(dst)); - err = _nfs42_proc_copy(src, pos_src, src_lock, - dst, pos_dst, dst_lock, count); + err = _nfs42_proc_copy(src, src_lock, + dst, dst_lock, + &args, &res); inode_unlock(file_inode(dst)); + if (err >= 0) + break; if (err == -ENOTSUPP) { err = -EOPNOTSUPP; break; @@ -331,9 +343,8 @@ nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) } nfs4_stateid_copy(&data->args.stateid, &lo->plh_stateid); spin_unlock(&inode->i_lock); - nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args, - &data->res.seq_res, task); - + nfs4_setup_sequence(server->nfs_client, &data->args.seq_args, + &data->res.seq_res, task); } static void @@ -397,10 +408,13 @@ static void nfs42_layoutstat_release(void *calldata) { struct nfs42_layoutstat_data *data = calldata; - struct nfs_server *nfss = NFS_SERVER(data->args.inode); + struct nfs42_layoutstat_devinfo *devinfo = data->args.devinfo; + int i; - if (nfss->pnfs_curr_ld->cleanup_layoutstats) - nfss->pnfs_curr_ld->cleanup_layoutstats(data); + for (i = 0; i < data->args.num_dev; i++) { + if (devinfo[i].ld_private.ops && devinfo[i].ld_private.ops->free) + devinfo[i].ld_private.ops->free(&devinfo[i].ld_private); + } pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); smp_mb__before_atomic(); diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 8b2605882a20..6c7296454bbc 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -181,8 +181,9 @@ static void encode_layoutstats(struct xdr_stream *xdr, NFS4_DEVICEID4_SIZE); /* Encode layoutupdate4 */ *p++ = cpu_to_be32(devinfo->layout_type); - if (devinfo->layoutstats_encode != NULL) - devinfo->layoutstats_encode(xdr, args, devinfo); + if (devinfo->ld_private.ops) + devinfo->ld_private.ops->encode(xdr, args, + &devinfo->ld_private); else encode_uint32(xdr, 0); } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1452177c822d..af285cc27ccf 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -273,14 +273,6 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, fmode_t fmode); #if defined(CONFIG_NFS_V4_1) -static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) -{ - return server->nfs_client->cl_session; -} - -extern int nfs41_setup_sequence(struct nfs4_session *session, - struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - struct rpc_task *task); extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *); extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *); @@ -357,11 +349,6 @@ nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, hdr->args.stable = NFS_FILE_SYNC; } #else /* CONFIG_NFS_v4_1 */ -static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) -{ - return NULL; -} - static inline bool is_ds_only_client(struct nfs_client *clp) { @@ -457,7 +444,7 @@ extern void nfs41_handle_server_scope(struct nfs_client *, extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t, - const struct nfs_lockowner *, nfs4_stateid *, + const struct nfs_lock_context *, nfs4_stateid *, struct rpc_cred **); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); @@ -466,7 +453,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); extern void nfs_release_seqid(struct nfs_seqid *seqid); extern void nfs_free_seqid(struct nfs_seqid *seqid); -extern int nfs40_setup_sequence(struct nfs4_slot_table *tbl, +extern int nfs4_setup_sequence(const struct nfs_client *client, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 074ac7131459..8346ccbf2d52 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -464,6 +464,11 @@ static bool nfs4_match_client_owner_id(const struct nfs_client *clp1, return strcmp(clp1->cl_owner_id, clp2->cl_owner_id) == 0; } +static bool nfs4_same_verifier(nfs4_verifier *v1, nfs4_verifier *v2) +{ + return memcmp(v1->data, v2->data, sizeof(v1->data)) == 0; +} + /** * nfs40_walk_client_list - Find server that recognizes a client ID * @@ -521,7 +526,21 @@ int nfs40_walk_client_list(struct nfs_client *new, if (!nfs4_match_client_owner_id(pos, new)) continue; - + /* + * We just sent a new SETCLIENTID, which should have + * caused the server to return a new cl_confirm. So if + * cl_confirm is the same, then this is a different + * server that just returned the same cl_confirm by + * coincidence: + */ + if ((new != pos) && nfs4_same_verifier(&pos->cl_confirm, + &new->cl_confirm)) + continue; + /* + * But if the cl_confirm's are different, then the only + * way that a SETCLIENTID_CONFIRM to pos can succeed is + * if new and pos point to the same server: + */ atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); @@ -534,6 +553,7 @@ int nfs40_walk_client_list(struct nfs_client *new, break; case 0: nfs4_swap_callback_idents(pos, new); + pos->cl_confirm = new->cl_confirm; prev = NULL; *result = pos; @@ -881,7 +901,6 @@ static int nfs4_set_client(struct nfs_server *server, const struct sockaddr *addr, const size_t addrlen, const char *ip_addr, - rpc_authflavor_t authflavour, int proto, const struct rpc_timeout *timeparms, u32 minorversion, struct net *net) { @@ -907,7 +926,7 @@ static int nfs4_set_client(struct nfs_server *server, set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init, authflavour); + clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) { error = PTR_ERR(clp); goto error; @@ -948,7 +967,7 @@ error: struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, const struct sockaddr *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, - u32 minor_version, rpc_authflavor_t au_flavor) + u32 minor_version) { struct rpc_timeout ds_timeout; struct nfs_client *mds_clp = mds_srv->nfs_client; @@ -979,7 +998,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, * (section 13.1 RFC 5661). */ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); - clp = nfs_get_client(&cl_init, au_flavor); + clp = nfs_get_client(&cl_init); dprintk("<-- %s %p\n", __func__, clp); return clp; @@ -1004,9 +1023,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; - if (server->rsize > server_resp_sz) + if (!server->rsize || server->rsize > server_resp_sz) server->rsize = server_resp_sz; - if (server->wsize > server_rqst_sz) + if (!server->wsize || server->wsize > server_rqst_sz) server->wsize = server_rqst_sz; #endif /* CONFIG_NFS_V4_1 */ } @@ -1103,7 +1122,6 @@ static int nfs4_init_server(struct nfs_server *server, (const struct sockaddr *)&data->nfs_server.address, data->nfs_server.addrlen, data->client_address, - data->selected_flavor, data->nfs_server.protocol, &timeparms, data->minorversion, @@ -1200,7 +1218,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, data->addr, data->addrlen, parent_client->cl_ipaddr, - data->authflavor, rpc_protocol(parent_server->client), parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, @@ -1311,7 +1328,6 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, nfs_server_remove_lists(server); error = nfs4_set_client(server, hostname, sap, salen, buf, - clp->cl_rpcclient->cl_auth->au_flavor, clp->cl_proto, clnt->cl_timeout, clp->cl_minorversion, net); nfs_put_client(clp); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 89a77950e0b0..0efba77789b9 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -57,7 +57,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) parent = dget_parent(dentry); dir = d_inode(parent); - ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode); + ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); err = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index c444285bb1b1..835c163f61af 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -316,7 +316,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, if (ret < 0) goto out_up; - payload = user_key_payload(rkey); + payload = user_key_payload_rcu(rkey); if (IS_ERR_OR_NULL(payload)) { ret = PTR_ERR(payload); goto out_up; diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index d21104912676..d8b040bd9814 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -279,7 +279,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, mountdata->hostname, mountdata->mnt_path); - mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata); + mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata); if (!IS_ERR(mnt)) break; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 241da19b7da4..201ca3f2c4ba 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -38,7 +38,6 @@ #include <linux/mm.h> #include <linux/delay.h> #include <linux/errno.h> -#include <linux/file.h> #include <linux/string.h> #include <linux/ratelimit.h> #include <linux/printk.h> @@ -94,7 +93,7 @@ static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fa static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs4_state *state, struct nfs4_label *ilabel, + struct nfs_open_context *ctx, struct nfs4_label *ilabel, struct nfs4_label *olabel); #ifdef CONFIG_NFS_V4_1 static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, @@ -226,7 +225,6 @@ static const u32 nfs4_pnfs_open_bitmap[3] = { static const u32 nfs4_open_noattr_bitmap[3] = { FATTR4_WORD0_TYPE - | FATTR4_WORD0_CHANGE | FATTR4_WORD0_FILEID, }; @@ -579,12 +577,7 @@ nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server, static bool _nfs4_is_integrity_protected(struct nfs_client *clp) { rpc_authflavor_t flavor = clp->cl_rpcclient->cl_auth->au_flavor; - - if (flavor == RPC_AUTH_GSS_KRB5I || - flavor == RPC_AUTH_GSS_KRB5P) - return true; - - return false; + return (flavor == RPC_AUTH_GSS_KRB5I) || (flavor == RPC_AUTH_GSS_KRB5P); } static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp) @@ -624,48 +617,6 @@ static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) args->sa_privileged = 1; } -int nfs40_setup_sequence(struct nfs4_slot_table *tbl, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - struct rpc_task *task) -{ - struct nfs4_slot *slot; - - /* slot already allocated? */ - if (res->sr_slot != NULL) - goto out_start; - - spin_lock(&tbl->slot_tbl_lock); - if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) - goto out_sleep; - - slot = nfs4_alloc_slot(tbl); - if (IS_ERR(slot)) { - if (slot == ERR_PTR(-ENOMEM)) - task->tk_timeout = HZ >> 2; - goto out_sleep; - } - spin_unlock(&tbl->slot_tbl_lock); - - slot->privileged = args->sa_privileged ? 1 : 0; - args->sa_slot = slot; - res->sr_slot = slot; - -out_start: - rpc_call_start(task); - return 0; - -out_sleep: - if (args->sa_privileged) - rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task, - NULL, RPC_PRIORITY_PRIVILEGED); - else - rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); - spin_unlock(&tbl->slot_tbl_lock); - return -EAGAIN; -} -EXPORT_SYMBOL_GPL(nfs40_setup_sequence); - static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res) { struct nfs4_slot *slot = res->sr_slot; @@ -880,101 +831,14 @@ int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) } EXPORT_SYMBOL_GPL(nfs4_sequence_done); -int nfs41_setup_sequence(struct nfs4_session *session, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - struct rpc_task *task) -{ - struct nfs4_slot *slot; - struct nfs4_slot_table *tbl; - - dprintk("--> %s\n", __func__); - /* slot already allocated? */ - if (res->sr_slot != NULL) - goto out_success; - - tbl = &session->fc_slot_table; - - task->tk_timeout = 0; - - spin_lock(&tbl->slot_tbl_lock); - if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state) && - !args->sa_privileged) { - /* The state manager will wait until the slot table is empty */ - dprintk("%s session is draining\n", __func__); - goto out_sleep; - } - - slot = nfs4_alloc_slot(tbl); - if (IS_ERR(slot)) { - /* If out of memory, try again in 1/4 second */ - if (slot == ERR_PTR(-ENOMEM)) - task->tk_timeout = HZ >> 2; - dprintk("<-- %s: no free slots\n", __func__); - goto out_sleep; - } - spin_unlock(&tbl->slot_tbl_lock); - - slot->privileged = args->sa_privileged ? 1 : 0; - args->sa_slot = slot; - - dprintk("<-- %s slotid=%u seqid=%u\n", __func__, - slot->slot_nr, slot->seq_nr); - - res->sr_slot = slot; - res->sr_timestamp = jiffies; - res->sr_status_flags = 0; - /* - * sr_status is only set in decode_sequence, and so will remain - * set to 1 if an rpc level failure occurs. - */ - res->sr_status = 1; - trace_nfs4_setup_sequence(session, args); -out_success: - rpc_call_start(task); - return 0; -out_sleep: - /* Privileged tasks are queued with top priority */ - if (args->sa_privileged) - rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task, - NULL, RPC_PRIORITY_PRIVILEGED); - else - rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); - spin_unlock(&tbl->slot_tbl_lock); - return -EAGAIN; -} -EXPORT_SYMBOL_GPL(nfs41_setup_sequence); - -static int nfs4_setup_sequence(const struct nfs_server *server, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - struct rpc_task *task) -{ - struct nfs4_session *session = nfs4_get_session(server); - int ret = 0; - - if (!session) - return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, - args, res, task); - - dprintk("--> %s clp %p session %p sr_slot %u\n", - __func__, session->clp, session, res->sr_slot ? - res->sr_slot->slot_nr : NFS4_NO_SLOT); - - ret = nfs41_setup_sequence(session, args, res, task); - - dprintk("<-- %s status=%d\n", __func__, ret); - return ret; -} - static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) { struct nfs4_call_sync_data *data = calldata; - struct nfs4_session *session = nfs4_get_session(data->seq_server); dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); - nfs41_setup_sequence(session, data->seq_args, data->seq_res, task); + nfs4_setup_sequence(data->seq_server->nfs_client, + data->seq_args, data->seq_res, task); } static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) @@ -991,15 +855,6 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { #else /* !CONFIG_NFS_V4_1 */ -static int nfs4_setup_sequence(const struct nfs_server *server, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - struct rpc_task *task) -{ - return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, - args, res, task); -} - static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) { return nfs40_sequence_done(task, res); @@ -1020,10 +875,68 @@ EXPORT_SYMBOL_GPL(nfs4_sequence_done); #endif /* !CONFIG_NFS_V4_1 */ +int nfs4_setup_sequence(const struct nfs_client *client, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct rpc_task *task) +{ + struct nfs4_session *session = nfs4_get_session(client); + struct nfs4_slot_table *tbl = client->cl_slot_tbl; + struct nfs4_slot *slot; + + /* slot already allocated? */ + if (res->sr_slot != NULL) + goto out_start; + + if (session) { + tbl = &session->fc_slot_table; + task->tk_timeout = 0; + } + + spin_lock(&tbl->slot_tbl_lock); + /* The state manager will wait until the slot table is empty */ + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) + goto out_sleep; + + slot = nfs4_alloc_slot(tbl); + if (IS_ERR(slot)) { + /* Try again in 1/4 second */ + if (slot == ERR_PTR(-ENOMEM)) + task->tk_timeout = HZ >> 2; + goto out_sleep; + } + spin_unlock(&tbl->slot_tbl_lock); + + slot->privileged = args->sa_privileged ? 1 : 0; + args->sa_slot = slot; + + res->sr_slot = slot; + if (session) { + res->sr_timestamp = jiffies; + res->sr_status_flags = 0; + res->sr_status = 1; + } + + trace_nfs4_setup_sequence(session, args); +out_start: + rpc_call_start(task); + return 0; + +out_sleep: + if (args->sa_privileged) + rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task, + NULL, RPC_PRIORITY_PRIVILEGED); + else + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + spin_unlock(&tbl->slot_tbl_lock); + return -EAGAIN; +} +EXPORT_SYMBOL_GPL(nfs4_setup_sequence); + static void nfs40_call_sync_prepare(struct rpc_task *task, void *calldata) { struct nfs4_call_sync_data *data = calldata; - nfs4_setup_sequence(data->seq_server, + nfs4_setup_sequence(data->seq_server->nfs_client, data->seq_args, data->seq_res, task); } @@ -1080,15 +993,24 @@ int nfs4_call_sync(struct rpc_clnt *clnt, return nfs4_call_sync_sequence(clnt, server, msg, args, res); } -static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) +static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, + unsigned long timestamp) { struct nfs_inode *nfsi = NFS_I(dir); spin_lock(&dir->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - if (!cinfo->atomic || cinfo->before != dir->i_version) + if (cinfo->atomic && cinfo->before == dir->i_version) { + nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; + nfsi->attrtimeo_timestamp = jiffies; + } else { nfs_force_lookup_revalidate(dir); + if (cinfo->before != dir->i_version) + nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | + NFS_INO_INVALID_ACL; + } dir->i_version = cinfo->after; + nfsi->read_cache_jiffies = timestamp; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfs_fscache_invalidate(dir); spin_unlock(&dir->i_lock); @@ -1221,6 +1143,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, atomic_inc(&sp->so_count); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); + p->o_arg.umask = current_umask(); + p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); p->o_arg.share_access = nfs4_map_atomic_open_share(server, fmode, flags); /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS @@ -1228,8 +1152,16 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, if (!(flags & O_EXCL)) { /* ask server to check for all possible rights as results * are cached */ - p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | - NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; + switch (p->o_arg.claim) { + default: + break; + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_FH: + p->o_arg.access = NFS4_ACCESS_READ | + NFS4_ACCESS_MODIFY | + NFS4_ACCESS_EXTEND | + NFS4_ACCESS_EXECUTE; + } } p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); @@ -1239,7 +1171,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.bitmask = nfs4_bitmask(server, label); p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; p->o_arg.label = nfs4_label_copy(p->a_label, label); - p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); switch (p->o_arg.claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_CUR: @@ -1310,14 +1241,6 @@ static void nfs4_opendata_put(struct nfs4_opendata *p) kref_put(&p->kref, nfs4_opendata_free); } -static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) -{ - int ret; - - ret = rpc_wait_for_completion_task(task); - return ret; -} - static bool nfs4_mode_match_open_stateid(struct nfs4_state *state, fmode_t fmode) { @@ -1712,17 +1635,15 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) int ret; if (!data->rpc_done) { - if (data->rpc_status) { - ret = data->rpc_status; - goto err; - } + if (data->rpc_status) + return ERR_PTR(data->rpc_status); /* cached opens have already been processed */ goto update; } ret = nfs_refresh_inode(inode, &data->f_attr); if (ret) - goto err; + return ERR_PTR(ret); if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); @@ -1732,9 +1653,6 @@ update: atomic_inc(&state->count); return state; -err: - return ERR_PTR(ret); - } static struct nfs4_state * @@ -2028,8 +1946,8 @@ static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; - nfs40_setup_sequence(data->o_arg.server->nfs_client->cl_slot_tbl, - &data->c_arg.seq_args, &data->c_res.seq_res, task); + nfs4_setup_sequence(data->o_arg.server->nfs_client, + &data->c_arg.seq_args, &data->c_res.seq_res, task); } static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) @@ -2104,7 +2022,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - status = nfs4_wait_for_completion_rpc_task(task); + status = rpc_wait_for_completion_task(task); if (status != 0) { data->cancelled = 1; smp_wmb(); @@ -2152,7 +2070,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); } data->timestamp = jiffies; - if (nfs4_setup_sequence(data->o_arg.server, + if (nfs4_setup_sequence(data->o_arg.server->nfs_client, &data->o_arg.seq_args, &data->o_res.seq_res, task) != 0) @@ -2269,15 +2187,15 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) data->is_recover = 1; } task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return PTR_ERR(task); - status = nfs4_wait_for_completion_rpc_task(task); - if (status != 0) { - data->cancelled = 1; - smp_wmb(); - } else - status = data->rpc_status; - rpc_put_task(task); + if (IS_ERR(task)) + return PTR_ERR(task); + status = rpc_wait_for_completion_task(task); + if (status != 0) { + data->cancelled = 1; + smp_wmb(); + } else + status = data->rpc_status; + rpc_put_task(task); return status; } @@ -2286,7 +2204,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) { struct inode *dir = d_inode(data->dir); struct nfs_openres *o_res = &data->o_res; - int status; + int status; status = nfs4_run_open_task(data, 1); if (status != 0 || !data->rpc_done) @@ -2294,11 +2212,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr); - if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { + if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) status = _nfs4_proc_open_confirm(data); - if (status != 0) - return status; - } return status; } @@ -2343,8 +2258,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred, if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0) return 0; - /* even though OPEN succeeded, access is denied. Close the file */ - nfs4_close_state(state, fmode); return -EACCES; } @@ -2372,11 +2285,13 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) nfs_fattr_map_and_free_names(server, &data->f_attr); if (o_arg->open_flags & O_CREAT) { - update_changeattr(dir, &o_res->cinfo); if (o_arg->open_flags & O_EXCL) data->file_created = 1; else if (o_res->cinfo.before != o_res->cinfo.after) data->file_created = 1; + if (data->file_created || dir->i_version != o_res->cinfo.after) + update_changeattr(dir, &o_res->cinfo, + o_res->f_attr->time_start); } if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) server->caps &= ~NFS_CAP_POSIX_LOCK; @@ -2390,11 +2305,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) return 0; } -static int nfs4_recover_expired_lease(struct nfs_server *server) -{ - return nfs4_client_recover_expired_lease(server->nfs_client); -} - /* * OPEN_EXPIRED: * reclaim state on the server after a network partition. @@ -2532,17 +2442,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) } nfs4_stateid_copy(&stateid, &delegation->stateid); - if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { + if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) || + !test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, + &delegation->flags)) { rcu_read_unlock(); nfs_finish_clear_delegation_stateid(state, &stateid); return; } - if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) { - rcu_read_unlock(); - return; - } - cred = get_rpccred(delegation->cred); rcu_read_unlock(); status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); @@ -2678,7 +2585,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, sattr->ia_valid |= ATTR_MTIME; /* Except MODE, it seems harmless of setting twice. */ - if ((attrset[1] & FATTR4_WORD1_MODE)) + if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE && + attrset[1] & FATTR4_WORD1_MODE) sattr->ia_valid &= ~ATTR_MODE; if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL) @@ -2707,6 +2615,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, ret = PTR_ERR(state); if (IS_ERR(state)) goto out; + ctx->state = state; if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); if (opendata->o_res.rflags & NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK) @@ -2732,7 +2641,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, if (ret != 0) goto out; - ctx->state = state; if (d_inode(dentry) == state->inode) { nfs_inode_attach_open_context(ctx); if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) @@ -2771,7 +2679,7 @@ static int _nfs4_do_open(struct inode *dir, dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); goto out_err; } - status = nfs4_recover_expired_lease(server); + status = nfs4_client_recover_expired_lease(server->nfs_client); if (status != 0) goto err_put_state_owner; if (d_really_is_positive(dentry)) @@ -2819,7 +2727,7 @@ static int _nfs4_do_open(struct inode *dir, nfs_fattr_init(opendata->o_res.f_attr); status = nfs4_do_setattr(state->inode, cred, opendata->o_res.f_attr, sattr, - state, label, olabel); + ctx, label, olabel); if (status == 0) { nfs_setattr_update_inode(state->inode, sattr, opendata->o_res.f_attr); @@ -2914,15 +2822,15 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_setattrargs *arg, struct nfs_setattrres *res, struct rpc_cred *cred, - struct nfs4_state *state) + struct nfs_open_context *ctx) { struct nfs_server *server = NFS_SERVER(inode); - struct rpc_message msg = { + struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], .rpc_argp = arg, .rpc_resp = res, .rpc_cred = cred, - }; + }; struct rpc_cred *delegation_cred = NULL; unsigned long timestamp = jiffies; fmode_t fmode; @@ -2937,15 +2845,17 @@ static int _nfs4_do_setattr(struct inode *inode, if (nfs4_copy_delegation_stateid(inode, fmode, &arg->stateid, &delegation_cred)) { /* Use that stateid */ - } else if (truncate && state != NULL) { - struct nfs_lockowner lockowner = { - .l_owner = current->files, - .l_pid = current->tgid, - }; - if (!nfs4_valid_open_stateid(state)) + } else if (truncate && ctx != NULL) { + struct nfs_lock_context *l_ctx; + if (!nfs4_valid_open_stateid(ctx->state)) return -EBADF; - if (nfs4_select_rw_stateid(state, FMODE_WRITE, &lockowner, - &arg->stateid, &delegation_cred) == -EIO) + l_ctx = nfs_get_lock_context(ctx); + if (IS_ERR(l_ctx)) + return PTR_ERR(l_ctx); + status = nfs4_select_rw_stateid(ctx->state, FMODE_WRITE, l_ctx, + &arg->stateid, &delegation_cred); + nfs_put_lock_context(l_ctx); + if (status == -EIO) return -EBADF; } else nfs4_stateid_copy(&arg->stateid, &zero_stateid); @@ -2955,7 +2865,7 @@ static int _nfs4_do_setattr(struct inode *inode, status = nfs4_call_sync(server->client, server, &msg, &arg->seq_args, &res->seq_res, 1); put_rpccred(delegation_cred); - if (status == 0 && state != NULL) + if (status == 0 && ctx != NULL) renew_lease(server, timestamp); trace_nfs4_setattr(inode, &arg->stateid, status); return status; @@ -2963,22 +2873,23 @@ static int _nfs4_do_setattr(struct inode *inode, static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs4_state *state, struct nfs4_label *ilabel, + struct nfs_open_context *ctx, struct nfs4_label *ilabel, struct nfs4_label *olabel) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs_setattrargs arg = { - .fh = NFS_FH(inode), - .iap = sattr, + struct nfs4_state *state = ctx ? ctx->state : NULL; + struct nfs_setattrargs arg = { + .fh = NFS_FH(inode), + .iap = sattr, .server = server, .bitmask = server->attr_bitmask, .label = ilabel, - }; - struct nfs_setattrres res = { + }; + struct nfs_setattrres res = { .fattr = fattr, .label = olabel, .server = server, - }; + }; struct nfs4_exception exception = { .state = state, .inode = inode, @@ -2991,7 +2902,7 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, arg.bitmask = nfs4_bitmask(server, olabel); do { - err = _nfs4_do_setattr(inode, &arg, &res, cred, state); + err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx); switch (err) { case -NFS4ERR_OPENMODE: if (!(sattr->ia_valid & ATTR_SIZE)) { @@ -3028,10 +2939,15 @@ struct nfs4_closedata { struct nfs4_state *state; struct nfs_closeargs arg; struct nfs_closeres res; + struct { + struct nfs4_layoutreturn_args arg; + struct nfs4_layoutreturn_res res; + struct nfs4_xdr_opaque_data ld_private; + u32 roc_barrier; + bool roc; + } lr; struct nfs_fattr fattr; unsigned long timestamp; - bool roc; - u32 roc_barrier; }; static void nfs4_free_closedata(void *data) @@ -3040,8 +2956,9 @@ static void nfs4_free_closedata(void *data) struct nfs4_state_owner *sp = calldata->state->owner; struct super_block *sb = calldata->state->inode->i_sb; - if (calldata->roc) - pnfs_roc_release(calldata->state->inode); + if (calldata->lr.roc) + pnfs_roc_release(&calldata->lr.arg, &calldata->lr.res, + calldata->res.lr_ret); nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); @@ -3060,17 +2977,50 @@ static void nfs4_close_done(struct rpc_task *task, void *data) if (!nfs4_sequence_done(task, &calldata->res.seq_res)) return; trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); - /* hmm. we are done with the inode, and in the process of freeing + + /* Handle Layoutreturn errors */ + if (calldata->arg.lr_args && task->tk_status != 0) { + switch (calldata->res.lr_ret) { + default: + calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; + break; + case 0: + calldata->arg.lr_args = NULL; + calldata->res.lr_res = NULL; + break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_UNKNOWN_LAYOUTTYPE: + case -NFS4ERR_WRONG_CRED: + calldata->arg.lr_args = NULL; + calldata->res.lr_res = NULL; + calldata->res.lr_ret = 0; + rpc_restart_call_prepare(task); + return; + } + } + + /* hmm. we are done with the inode, and in the process of freeing * the state_owner. we keep this around to process errors */ switch (task->tk_status) { case 0: res_stateid = &calldata->res.stateid; - if (calldata->roc) - pnfs_roc_set_barrier(state->inode, - calldata->roc_barrier); renew_lease(server, calldata->timestamp); break; + case -NFS4ERR_ACCESS: + if (calldata->arg.bitmask != NULL) { + calldata->arg.bitmask = NULL; + calldata->res.fattr = NULL; + task->tk_status = 0; + rpc_restart_call_prepare(task); + goto out_release; + + } + break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: @@ -3096,7 +3046,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) res_stateid, calldata->arg.fmode); out_release: nfs_release_seqid(calldata->arg.seqid); - nfs_refresh_inode(calldata->inode, calldata->res.fattr); + nfs_refresh_inode(calldata->inode, &calldata->fattr); dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); } @@ -3144,23 +3094,32 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_no_action; } - if (nfs4_wait_on_layoutreturn(inode, task)) { + if (!calldata->lr.roc && nfs4_wait_on_layoutreturn(inode, task)) { nfs_release_seqid(calldata->arg.seqid); goto out_wait; } if (calldata->arg.fmode == 0) task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; - if (calldata->roc) - pnfs_roc_get_barrier(inode, &calldata->roc_barrier); + + if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { + /* Close-to-open cache consistency revalidation */ + if (!nfs4_have_delegation(inode, FMODE_READ)) + calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; + else + calldata->arg.bitmask = NULL; + } calldata->arg.share_access = nfs4_map_atomic_open_share(NFS_SERVER(inode), calldata->arg.fmode, 0); - nfs_fattr_init(calldata->res.fattr); + if (calldata->res.fattr == NULL) + calldata->arg.bitmask = NULL; + else if (calldata->arg.bitmask == NULL) + calldata->res.fattr = NULL; calldata->timestamp = jiffies; - if (nfs4_setup_sequence(NFS_SERVER(inode), + if (nfs4_setup_sequence(NFS_SERVER(inode)->nfs_client, &calldata->arg.seq_args, &calldata->res.seq_res, task) != 0) @@ -3179,13 +3138,6 @@ static const struct rpc_call_ops nfs4_close_ops = { .rpc_release = nfs4_free_closedata, }; -static bool nfs4_roc(struct inode *inode) -{ - if (!nfs_have_layout(inode)) - return false; - return pnfs_roc(inode); -} - /* * It is possible for data to be read/written from a mem-mapped file * after the sys_close call (which hits the vfs layer as a flush). @@ -3232,12 +3184,19 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask); if (IS_ERR(calldata->arg.seqid)) goto out_free_calldata; + nfs_fattr_init(&calldata->fattr); calldata->arg.fmode = 0; - calldata->arg.bitmask = server->cache_consistency_bitmask; + calldata->lr.arg.ld_private = &calldata->lr.ld_private; calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; - calldata->roc = nfs4_roc(state->inode); + calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; + calldata->lr.roc = pnfs_roc(state->inode, + &calldata->lr.arg, &calldata->lr.res, msg.rpc_cred); + if (calldata->lr.roc) { + calldata->arg.lr_args = &calldata->lr.arg; + calldata->res.lr_res = &calldata->lr.res; + } nfs_sb_active(calldata->inode->i_sb); msg.rpc_argp = &calldata->arg; @@ -3290,7 +3249,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_MODE_UMASK - 1UL) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { @@ -3448,16 +3407,11 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl .pseudoflavor = flavor, }; struct rpc_auth *auth; - int ret; auth = rpcauth_create(&auth_args, server->client); - if (IS_ERR(auth)) { - ret = -EACCES; - goto out; - } - ret = nfs4_lookup_root(server, fhandle, info); -out: - return ret; + if (IS_ERR(auth)) + return -EACCES; + return nfs4_lookup_root(server, fhandle, info); } /* @@ -3687,7 +3641,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, { struct inode *inode = d_inode(dentry); struct rpc_cred *cred = NULL; - struct nfs4_state *state = NULL; + struct nfs_open_context *ctx = NULL; struct nfs4_label *label = NULL; int status; @@ -3708,20 +3662,17 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, /* Search for an existing open(O_WRITE) file */ if (sattr->ia_valid & ATTR_FILE) { - struct nfs_open_context *ctx; ctx = nfs_file_open_context(sattr->ia_file); - if (ctx) { + if (ctx) cred = ctx->cred; - state = ctx->state; - } } label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); if (IS_ERR(label)) return PTR_ERR(label); - status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label); + status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL, label); if (status == 0) { nfs_setattr_update_inode(inode, sattr, fattr); nfs_setsecurity(inode, fattr, label); @@ -3966,18 +3917,20 @@ static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_label l, *ilabel = NULL; struct nfs_open_context *ctx; struct nfs4_state *state; int status = 0; - ctx = alloc_nfs_open_context(dentry, FMODE_READ); + ctx = alloc_nfs_open_context(dentry, FMODE_READ, NULL); if (IS_ERR(ctx)) return PTR_ERR(ctx); ilabel = nfs4_label_init_security(dir, dentry, sattr, &l); - sattr->ia_mode &= ~current_umask(); + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + sattr->ia_mode &= ~current_umask(); state = nfs4_do_open(dir, ctx, flags, sattr, ilabel, NULL); if (IS_ERR(state)) { status = PTR_ERR(state); @@ -4004,11 +3957,12 @@ static int _nfs4_proc_remove(struct inode *dir, const struct qstr *name) .rpc_argp = &args, .rpc_resp = &res, }; + unsigned long timestamp = jiffies; int status; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); if (status == 0) - update_changeattr(dir, &res.cinfo); + update_changeattr(dir, &res.cinfo, timestamp); return status; } @@ -4040,7 +3994,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) { - nfs4_setup_sequence(NFS_SB(data->dentry->d_sb), + nfs4_setup_sequence(NFS_SB(data->dentry->d_sb)->nfs_client, &data->args.seq_args, &data->res.seq_res, task); @@ -4056,7 +4010,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN) return 0; - update_changeattr(dir, &res->cinfo); + if (task->tk_status == 0) + update_changeattr(dir, &res->cinfo, res->dir_attr->time_start); return 1; } @@ -4073,7 +4028,7 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) { - nfs4_setup_sequence(NFS_SERVER(data->old_dir), + nfs4_setup_sequence(NFS_SERVER(data->old_dir)->nfs_client, &data->args.seq_args, &data->res.seq_res, task); @@ -4090,8 +4045,11 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN) return 0; - update_changeattr(old_dir, &res->old_cinfo); - update_changeattr(new_dir, &res->new_cinfo); + if (task->tk_status == 0) { + update_changeattr(old_dir, &res->old_cinfo, res->old_fattr->time_start); + if (new_dir != old_dir) + update_changeattr(new_dir, &res->new_cinfo, res->new_fattr->time_start); + } return 1; } @@ -4128,7 +4086,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { - update_changeattr(dir, &res.cinfo); + update_changeattr(dir, &res.cinfo, res.fattr->time_start); status = nfs_post_op_update_inode(inode, res.fattr); if (!status) nfs_setsecurity(inode, res.fattr, res.label); @@ -4185,6 +4143,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, data->arg.attrs = sattr; data->arg.ftype = ftype; data->arg.bitmask = nfs4_bitmask(server, data->label); + data->arg.umask = current_umask(); data->res.server = server; data->res.fh = &data->fh; data->res.fattr = &data->fattr; @@ -4202,7 +4161,8 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, &data->arg.seq_args, &data->res.seq_res, 1); if (status == 0) { - update_changeattr(dir, &data->res.dir_cinfo); + update_changeattr(dir, &data->res.dir_cinfo, + data->res.fattr->time_start); status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label); } return status; @@ -4282,13 +4242,15 @@ out: static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; struct nfs4_label l, *label = NULL; int err; label = nfs4_label_init_security(dir, dentry, sattr, &l); - sattr->ia_mode &= ~current_umask(); + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + sattr->ia_mode &= ~current_umask(); do { err = _nfs4_proc_mkdir(dir, dentry, sattr, label); trace_nfs4_mkdir(dir, &dentry->d_name, err); @@ -4391,13 +4353,15 @@ out: static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dev_t rdev) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; struct nfs4_label l, *label = NULL; int err; label = nfs4_label_init_security(dir, dentry, sattr, &l); - sattr->ia_mode &= ~current_umask(); + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + sattr->ia_mode &= ~current_umask(); do { err = _nfs4_proc_mknod(dir, dentry, sattr, label, rdev); trace_nfs4_mknod(dir, &dentry->d_name, err); @@ -4541,11 +4505,7 @@ int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_lock_context *l_ctx, fmode_t fmode) { - const struct nfs_lockowner *lockowner = NULL; - - if (l_ctx != NULL) - lockowner = &l_ctx->lockowner; - return nfs4_select_rw_stateid(ctx->state, fmode, lockowner, stateid, NULL); + return nfs4_select_rw_stateid(ctx->state, fmode, l_ctx, stateid, NULL); } EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid); @@ -4643,7 +4603,7 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_header *hdr) { - if (nfs4_setup_sequence(NFS_SERVER(hdr->inode), + if (nfs4_setup_sequence(NFS_SERVER(hdr->inode)->nfs_client, &hdr->args.seq_args, &hdr->res.seq_res, task)) @@ -4742,7 +4702,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) { - nfs4_setup_sequence(NFS_SERVER(data->inode), + nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client, &data->args.seq_args, &data->res.seq_res, task); @@ -4895,8 +4855,8 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen, if (newpage == NULL) goto unwind; memcpy(page_address(newpage), buf, len); - buf += len; - buflen -= len; + buf += len; + buflen -= len; *pages++ = newpage; rc++; } while (buflen != 0); @@ -4989,7 +4949,7 @@ out: */ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { - struct page *pages[NFS4ACL_MAXPAGES] = {NULL, }; + struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, }; struct nfs_getaclargs args = { .fh = NFS_FH(inode), .acl_pages = pages, @@ -5003,13 +4963,9 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); + unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; int ret = -ENOMEM, i; - /* As long as we're doing a round trip to the server anyway, - * let's be prepared for a page of acl data. */ - if (npages == 0) - npages = 1; if (npages > ARRAY_SIZE(pages)) return -ERANGE; @@ -5219,8 +5175,8 @@ static int _nfs4_do_set_security_label(struct inode *inode, struct nfs_server *server = NFS_SERVER(inode); const u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; struct nfs_setattrargs arg = { - .fh = NFS_FH(inode), - .iap = &sattr, + .fh = NFS_FH(inode), + .iap = &sattr, .server = server, .bitmask = bitmask, .label = ilabel, @@ -5231,9 +5187,9 @@ static int _nfs4_do_set_security_label(struct inode *inode, .server = server, }; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], - .rpc_argp = &arg, - .rpc_resp = &res, + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], + .rpc_argp = &arg, + .rpc_resp = &res, }; int status; @@ -5564,11 +5520,16 @@ struct nfs4_delegreturndata { struct nfs_fh fh; nfs4_stateid stateid; unsigned long timestamp; + struct { + struct nfs4_layoutreturn_args arg; + struct nfs4_layoutreturn_res res; + struct nfs4_xdr_opaque_data ld_private; + u32 roc_barrier; + bool roc; + } lr; struct nfs_fattr fattr; int rpc_status; struct inode *inode; - bool roc; - u32 roc_barrier; }; static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) @@ -5579,6 +5540,32 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) return; trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); + + /* Handle Layoutreturn errors */ + if (data->args.lr_args && task->tk_status != 0) { + switch(data->res.lr_ret) { + default: + data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; + break; + case 0: + data->args.lr_args = NULL; + data->res.lr_res = NULL; + break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_UNKNOWN_LAYOUTTYPE: + case -NFS4ERR_WRONG_CRED: + data->args.lr_args = NULL; + data->res.lr_res = NULL; + data->res.lr_ret = 0; + rpc_restart_call_prepare(task); + return; + } + } + switch (task->tk_status) { case 0: renew_lease(data->res.server, data->timestamp); @@ -5594,6 +5581,14 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) case -NFS4ERR_STALE_STATEID: task->tk_status = 0; break; + case -NFS4ERR_ACCESS: + if (data->args.bitmask) { + data->args.bitmask = NULL; + data->res.fattr = NULL; + task->tk_status = 0; + rpc_restart_call_prepare(task); + return; + } default: if (nfs4_async_handle_error(task, data->res.server, NULL, NULL) == -EAGAIN) { @@ -5602,8 +5597,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) } } data->rpc_status = task->tk_status; - if (data->roc && data->rpc_status == 0) - pnfs_roc_set_barrier(data->inode, data->roc_barrier); } static void nfs4_delegreturn_release(void *calldata) @@ -5612,8 +5605,10 @@ static void nfs4_delegreturn_release(void *calldata) struct inode *inode = data->inode; if (inode) { - if (data->roc) - pnfs_roc_release(inode); + if (data->lr.roc) + pnfs_roc_release(&data->lr.arg, &data->lr.res, + data->res.lr_ret); + nfs_post_op_update_inode_force_wcc(inode, &data->fattr); nfs_iput_and_deactive(inode); } kfree(calldata); @@ -5625,13 +5620,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) d_data = (struct nfs4_delegreturndata *)data; - if (nfs4_wait_on_layoutreturn(d_data->inode, task)) + if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) return; - if (d_data->roc) - pnfs_roc_get_barrier(d_data->inode, &d_data->roc_barrier); - - nfs4_setup_sequence(d_data->res.server, + nfs4_setup_sequence(d_data->res.server->nfs_client, &d_data->args.seq_args, &d_data->res.seq_res, task); @@ -5676,12 +5668,22 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co nfs4_stateid_copy(&data->stateid, stateid); data->res.fattr = &data->fattr; data->res.server = server; + data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; + data->lr.arg.ld_private = &data->lr.ld_private; nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; + data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, cred); data->inode = nfs_igrab_and_active(inode); - if (data->inode) - data->roc = nfs4_roc(inode); + if (data->inode) { + if (data->lr.roc) { + data->args.lr_args = &data->lr.arg; + data->res.lr_res = &data->lr.res; + } + } else if (data->lr.roc) { + pnfs_roc_release(&data->lr.arg, &data->lr.res, 0); + data->lr.roc = false; + } task_setup_data.callback_data = data; msg.rpc_argp = &data->args; @@ -5691,14 +5693,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co return PTR_ERR(task); if (!issync) goto out; - status = nfs4_wait_for_completion_rpc_task(task); + status = rpc_wait_for_completion_task(task); if (status != 0) goto out; status = data->rpc_status; - if (status == 0) - nfs_post_op_update_inode_force_wcc(inode, &data->fattr); - else - nfs_refresh_inode(inode, &data->fattr); out: rpc_put_task(task); return status; @@ -5737,8 +5735,8 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT], - .rpc_argp = &arg, - .rpc_resp = &res, + .rpc_argp = &arg, + .rpc_resp = &res, .rpc_cred = state->owner->so_cred, }; struct nfs4_lock_state *lsp; @@ -5867,7 +5865,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) goto out_no_action; } calldata->timestamp = jiffies; - if (nfs4_setup_sequence(calldata->server, + if (nfs4_setup_sequence(calldata->server->nfs_client, &calldata->arg.seq_args, &calldata->res.seq_res, task) != 0) @@ -5965,7 +5963,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * status = PTR_ERR(task); if (IS_ERR(task)) goto out; - status = nfs4_wait_for_completion_rpc_task(task); + status = rpc_wait_for_completion_task(task); rpc_put_task(task); out: request->fl_flags = fl_flags; @@ -6015,7 +6013,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->server = server; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); - get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); return p; out_free_seqid: @@ -6053,7 +6050,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) goto out_release_open_seqid; } data->timestamp = jiffies; - if (nfs4_setup_sequence(data->server, + if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args, &data->res.seq_res, task) == 0) @@ -6128,7 +6125,6 @@ static void nfs4_lock_release(void *calldata) nfs_free_seqid(data->arg.lock_seqid); nfs4_put_lock_state(data->lsp); put_nfs_open_context(data->ctx); - fput(data->fl.fl_file); kfree(data); dprintk("%s: done!\n", __func__); } @@ -6194,7 +6190,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - ret = nfs4_wait_for_completion_rpc_task(task); + ret = rpc_wait_for_completion_task(task); if (ret == 0) { ret = data->rpc_status; if (ret) @@ -6273,8 +6269,7 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) || test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) return 0; - status = nfs4_lock_expired(state, request); - return status; + return nfs4_lock_expired(state, request); } #endif @@ -6520,8 +6515,8 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata { struct nfs_release_lockowner_data *data = calldata; struct nfs_server *server = data->server; - nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, - &data->args.seq_args, &data->res.seq_res, task); + nfs4_setup_sequence(server->nfs_client, &data->args.seq_args, + &data->res.seq_res, task); data->args.lock_owner.clientid = server->nfs_client->cl_clientid; data->timestamp = jiffies; } @@ -7112,11 +7107,9 @@ static bool nfs41_same_server_scope(struct nfs41_server_scope *a, struct nfs41_server_scope *b) { - if (a->server_scope_sz == b->server_scope_sz && - memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) - return true; - - return false; + if (a->server_scope_sz != b->server_scope_sz) + return false; + return memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0; } static void @@ -7429,11 +7422,11 @@ static void nfs4_exchange_id_release(void *data) struct nfs41_exchange_id_data *cdata = (struct nfs41_exchange_id_data *)data; - nfs_put_client(cdata->args.client); if (cdata->xprt) { xprt_put(cdata->xprt); rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient); } + nfs_put_client(cdata->args.client); kfree(cdata->res.impl_id); kfree(cdata->res.server_scope); kfree(cdata->res.server_owner); @@ -7540,10 +7533,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, task_setup_data.callback_data = calldata; task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) { - status = PTR_ERR(task); - goto out_impl_id; - } + if (IS_ERR(task)) + return PTR_ERR(task); if (!xprt) { status = rpc_wait_for_completion_task(task); @@ -7571,6 +7562,7 @@ out_server_owner: kfree(calldata->res.server_owner); out_calldata: kfree(calldata); + nfs_put_client(clp); goto out; } @@ -7711,7 +7703,7 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task, dprintk("--> %s\n", __func__); /* just setup sequence, do not trigger session recovery since we're invoked within one */ - nfs41_setup_sequence(data->clp->cl_session, + nfs4_setup_sequence(data->clp, &data->args->la_seq_args, &data->res->lr_seq_res, task); @@ -8082,7 +8074,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) args = task->tk_msg.rpc_argp; res = task->tk_msg.rpc_resp; - nfs41_setup_sequence(clp->cl_session, args, res, task); + nfs4_setup_sequence(clp, args, res, task); } static const struct rpc_call_ops nfs41_sequence_ops = { @@ -8170,7 +8162,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) { struct nfs4_reclaim_complete_data *calldata = data; - nfs41_setup_sequence(calldata->clp->cl_session, + nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args, &calldata->res.seq_res, task); @@ -8262,7 +8254,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, status = PTR_ERR(task); goto out; } - status = nfs4_wait_for_completion_rpc_task(task); + status = rpc_wait_for_completion_task(task); if (status == 0) status = task->tk_status; rpc_put_task(task); @@ -8277,10 +8269,9 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) { struct nfs4_layoutget *lgp = calldata; struct nfs_server *server = NFS_SERVER(lgp->args.inode); - struct nfs4_session *session = nfs4_get_session(server); dprintk("--> %s\n", __func__); - nfs41_setup_sequence(session, &lgp->args.seq_args, + nfs4_setup_sequence(server->nfs_client, &lgp->args.seq_args, &lgp->res.seq_res, task); dprintk("<-- %s\n", __func__); } @@ -8371,6 +8362,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, goto out; } + nfs4_sequence_free_slot(&lgp->res.seq_res); err = nfs4_handle_exception(server, nfs4err, exception); if (!status) { if (exception->retry) @@ -8494,7 +8486,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return ERR_CAST(task); - status = nfs4_wait_for_completion_rpc_task(task); + status = rpc_wait_for_completion_task(task); if (status == 0) { status = nfs4_layoutget_handle_exception(task, lgp, &exception); *timeout = exception.timeout; @@ -8523,7 +8515,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) struct nfs4_layoutreturn *lrp = calldata; dprintk("--> %s\n", __func__); - nfs41_setup_sequence(lrp->clp->cl_session, + nfs4_setup_sequence(lrp->clp, &lrp->args.seq_args, &lrp->res.seq_res, task); @@ -8559,21 +8551,13 @@ static void nfs4_layoutreturn_release(void *calldata) { struct nfs4_layoutreturn *lrp = calldata; struct pnfs_layout_hdr *lo = lrp->args.layout; - LIST_HEAD(freeme); dprintk("--> %s\n", __func__); - spin_lock(&lo->plh_inode->i_lock); - if (lrp->res.lrs_present) { - pnfs_mark_matching_lsegs_invalid(lo, &freeme, - &lrp->args.range, - be32_to_cpu(lrp->args.stateid.seqid)); - pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - } else - pnfs_mark_layout_stateid_invalid(lo, &freeme); - pnfs_clear_layoutreturn_waitbit(lo); - spin_unlock(&lo->plh_inode->i_lock); + pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range, + lrp->res.lrs_present ? &lrp->res.stateid : NULL); nfs4_sequence_free_slot(&lrp->res.seq_res); - pnfs_free_lseg_list(&freeme); + if (lrp->ld_private.ops && lrp->ld_private.ops->free) + lrp->ld_private.ops->free(&lrp->ld_private); pnfs_put_layout_hdr(lrp->args.layout); nfs_iput_and_deactive(lrp->inode); kfree(calldata); @@ -8681,9 +8665,8 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata) { struct nfs4_layoutcommit_data *data = calldata; struct nfs_server *server = NFS_SERVER(data->args.inode); - struct nfs4_session *session = nfs4_get_session(server); - nfs41_setup_sequence(session, + nfs4_setup_sequence(server->nfs_client, &data->args.seq_args, &data->res.seq_res, task); @@ -9007,7 +8990,7 @@ struct nfs_free_stateid_data { static void nfs41_free_stateid_prepare(struct rpc_task *task, void *calldata) { struct nfs_free_stateid_data *data = calldata; - nfs41_setup_sequence(nfs4_get_session(data->server), + nfs4_setup_sequence(data->server->nfs_client, &data->args.seq_args, &data->res.seq_res, task); @@ -9119,10 +9102,8 @@ static bool nfs41_match_stateid(const nfs4_stateid *s1, if (s1->seqid == s2->seqid) return true; - if (s1->seqid == 0 || s2->seqid == 0) - return true; - return false; + return s1->seqid == 0 || s2->seqid == 0; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 82e77198d17e..1f8c2ae43a8d 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -153,7 +153,7 @@ void nfs4_set_lease_period(struct nfs_client *clp, spin_unlock(&clp->cl_lock); /* Cap maximum reconnect timeout at 1/2 lease period */ - rpc_cap_max_reconnect_timeout(clp->cl_rpcclient, lease >> 1); + rpc_set_connect_timeout(clp->cl_rpcclient, lease, lease >> 1); } /* diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index a61350f75c74..769b85655c4b 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -169,7 +169,7 @@ bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot) struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid) { if (slotid <= tbl->max_slotid) - return nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT); + return nfs4_find_or_create_slot(tbl, slotid, 0, GFP_NOWAIT); return ERR_PTR(-E2BIG); } diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index dae385500005..dfae4880eacb 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -103,6 +103,11 @@ static inline bool nfs4_test_locked_slot(const struct nfs4_slot_table *tbl, return !!test_bit(slotid, tbl->used_slots); } +static inline struct nfs4_session *nfs4_get_session(const struct nfs_client *clp) +{ + return clp->cl_session; +} + #if defined(CONFIG_NFS_V4_1) extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, u32 target_highest_slotid); @@ -170,6 +175,8 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp) return 0; } +#define nfs_session_id_hash(session) (0) + #endif /* defined(CONFIG_NFS_V4_1) */ #endif /* IS_ENABLED(CONFIG_NFS_V4) */ #endif /* __LINUX_FS_NFS_NFS4SESSION_H */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0959c9661662..8156bad6b441 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -494,21 +494,18 @@ nfs4_alloc_state_owner(struct nfs_server *server, } static void -nfs4_drop_state_owner(struct nfs4_state_owner *sp) -{ - struct rb_node *rb_node = &sp->so_server_node; - - if (!RB_EMPTY_NODE(rb_node)) { - struct nfs_server *server = sp->so_server; - struct nfs_client *clp = server->nfs_client; - - spin_lock(&clp->cl_lock); - if (!RB_EMPTY_NODE(rb_node)) { - rb_erase(rb_node, &server->state_owners); - RB_CLEAR_NODE(rb_node); - } - spin_unlock(&clp->cl_lock); - } +nfs4_reset_state_owner(struct nfs4_state_owner *sp) +{ + /* This state_owner is no longer usable, but must + * remain in place so that state recovery can find it + * and the opens associated with it. + * It may also be used for new 'open' request to + * return a delegation to the server. + * So update the 'create_time' so that it looks like + * a new state_owner. This will cause the server to + * request an OPEN_CONFIRM to start a new sequence. + */ + sp->so_seqid.create_time = ktime_get(); } static void nfs4_free_state_owner(struct nfs4_state_owner *sp) @@ -797,19 +794,33 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) /* * Search the state->lock_states for an existing lock_owner - * that is compatible with current->files + * that is compatible with either of the given owners. + * If the second is non-zero, then the first refers to a Posix-lock + * owner (current->files) and the second refers to a flock/OFD + * owner (struct file*). In that case, prefer a match for the first + * owner. + * If both sorts of locks are held on the one file we cannot know + * which stateid was intended to be used, so a "correct" choice cannot + * be made. Failing that, a "consistent" choice is preferable. The + * consistent choice we make is to prefer the first owner, that of a + * Posix lock. */ static struct nfs4_lock_state * -__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +__nfs4_find_lock_state(struct nfs4_state *state, + fl_owner_t fl_owner, fl_owner_t fl_owner2) { - struct nfs4_lock_state *pos; + struct nfs4_lock_state *pos, *ret = NULL; list_for_each_entry(pos, &state->lock_states, ls_locks) { - if (pos->ls_owner != fl_owner) - continue; - atomic_inc(&pos->ls_count); - return pos; + if (pos->ls_owner == fl_owner) { + ret = pos; + break; + } + if (pos->ls_owner == fl_owner2) + ret = pos; } - return NULL; + if (ret) + atomic_inc(&ret->ls_count); + return ret; } /* @@ -857,7 +868,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ for(;;) { spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, owner); + lsp = __nfs4_find_lock_state(state, owner, NULL); if (lsp != NULL) break; if (new != NULL) { @@ -939,22 +950,23 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) static int nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, - const struct nfs_lockowner *lockowner) + const struct nfs_lock_context *l_ctx) { struct nfs4_lock_state *lsp; - fl_owner_t fl_owner; + fl_owner_t fl_owner, fl_flock_owner; int ret = -ENOENT; - - if (lockowner == NULL) + if (l_ctx == NULL) goto out; if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) goto out; - fl_owner = lockowner->l_owner; + fl_owner = l_ctx->lockowner; + fl_flock_owner = l_ctx->open_context->flock_owner; + spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner); + lsp = __nfs4_find_lock_state(state, fl_owner, fl_flock_owner); if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) ret = -EIO; else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { @@ -986,7 +998,7 @@ static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) * requests. */ int nfs4_select_rw_stateid(struct nfs4_state *state, - fmode_t fmode, const struct nfs_lockowner *lockowner, + fmode_t fmode, const struct nfs_lock_context *l_ctx, nfs4_stateid *dst, struct rpc_cred **cred) { int ret; @@ -995,7 +1007,7 @@ int nfs4_select_rw_stateid(struct nfs4_state *state, return -EIO; if (cred != NULL) *cred = NULL; - ret = nfs4_copy_lock_stateid(dst, state, lockowner); + ret = nfs4_copy_lock_stateid(dst, state, l_ctx); if (ret == -EIO) /* A lost lock - don't even consider delegations */ goto out; @@ -1079,6 +1091,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) case -NFS4ERR_BADXDR: case -NFS4ERR_RESOURCE: case -NFS4ERR_NOFILEHANDLE: + case -NFS4ERR_MOVED: /* Non-seqid mutating errors */ return; }; @@ -1098,7 +1111,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) sp = container_of(seqid->sequence, struct nfs4_state_owner, so_seqid); if (status == -NFS4ERR_BAD_SEQID) - nfs4_drop_state_owner(sp); + nfs4_reset_state_owner(sp); if (!nfs4_has_session(sp->so_server->nfs_client)) nfs_increment_seqid(status, seqid); } @@ -1717,7 +1730,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) break; case -NFS4ERR_STALE_CLIENTID: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_clear_reclaim_reboot(clp); nfs4_state_start_reclaim_reboot(clp); break; case -NFS4ERR_EXPIRED: @@ -2190,7 +2202,7 @@ void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); } - nfs4_schedule_lease_recovery(clp); + nfs4_schedule_state_manager(clp); } EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index cfb8f7ce5cf6..845d0eadefc9 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -241,38 +241,6 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session); DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence); DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete); -TRACE_EVENT(nfs4_setup_sequence, - TP_PROTO( - const struct nfs4_session *session, - const struct nfs4_sequence_args *args - ), - TP_ARGS(session, args), - - TP_STRUCT__entry( - __field(unsigned int, session) - __field(unsigned int, slot_nr) - __field(unsigned int, seq_nr) - __field(unsigned int, highest_used_slotid) - ), - - TP_fast_assign( - const struct nfs4_slot *sa_slot = args->sa_slot; - __entry->session = nfs_session_id_hash(&session->sess_id); - __entry->slot_nr = sa_slot->slot_nr; - __entry->seq_nr = sa_slot->seq_nr; - __entry->highest_used_slotid = - sa_slot->table->highest_used_slotid; - ), - TP_printk( - "session=0x%08x slot_nr=%u seq_nr=%u " - "highest_used_slotid=%u", - __entry->session, - __entry->slot_nr, - __entry->seq_nr, - __entry->highest_used_slotid - ) -); - #define show_nfs4_sequence_status_flags(status) \ __print_flags((unsigned long)status, "|", \ { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ @@ -382,6 +350,38 @@ TRACE_EVENT(nfs4_cb_sequence, ); #endif /* CONFIG_NFS_V4_1 */ +TRACE_EVENT(nfs4_setup_sequence, + TP_PROTO( + const struct nfs4_session *session, + const struct nfs4_sequence_args *args + ), + TP_ARGS(session, args), + + TP_STRUCT__entry( + __field(unsigned int, session) + __field(unsigned int, slot_nr) + __field(unsigned int, seq_nr) + __field(unsigned int, highest_used_slotid) + ), + + TP_fast_assign( + const struct nfs4_slot *sa_slot = args->sa_slot; + __entry->session = session ? nfs_session_id_hash(&session->sess_id) : 0; + __entry->slot_nr = sa_slot->slot_nr; + __entry->seq_nr = sa_slot->seq_nr; + __entry->highest_used_slotid = + sa_slot->table->highest_used_slotid; + ), + TP_printk( + "session=0x%08x slot_nr=%u seq_nr=%u " + "highest_used_slotid=%u", + __entry->session, + __entry->slot_nr, + __entry->seq_nr, + __entry->highest_used_slotid + ) +); + DECLARE_EVENT_CLASS(nfs4_open_event, TP_PROTO( const struct nfs_open_context *ctx, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fc89e5ed07ee..80ce289eea05 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -52,6 +52,7 @@ #include <linux/nfs.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include <linux/fs_struct.h> #include "nfs4_fs.h" #include "internal.h" @@ -168,8 +169,10 @@ static int nfs4_stat_to_errno(int); open_owner_id_maxsz + \ encode_opentype_maxsz + \ encode_claim_null_maxsz) +#define decode_space_limit_maxsz (3) #define decode_ace_maxsz (3 + nfs4_owner_maxsz) #define decode_delegation_maxsz (1 + decode_stateid_maxsz + 1 + \ + decode_space_limit_maxsz + \ decode_ace_maxsz) #define decode_change_info_maxsz (5) #define decode_open_maxsz (op_decode_hdr_maxsz + \ @@ -415,6 +418,8 @@ static int nfs4_stat_to_errno(int); #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 +#define encode_layoutreturn_maxsz 0 +#define decode_layoutreturn_maxsz 0 #endif /* CONFIG_NFS_V4_1 */ #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ @@ -499,22 +504,24 @@ static int nfs4_stat_to_errno(int); (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_open_downgrade_maxsz + \ - encode_getattr_maxsz) + encode_layoutreturn_maxsz + \ + encode_open_downgrade_maxsz) #define NFS4_dec_open_downgrade_sz \ (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_open_downgrade_maxsz + \ - decode_getattr_maxsz) + decode_layoutreturn_maxsz + \ + decode_open_downgrade_maxsz) #define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ + encode_layoutreturn_maxsz + \ encode_close_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ + decode_layoutreturn_maxsz + \ decode_close_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ @@ -708,10 +715,13 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ + encode_layoutreturn_maxsz + \ encode_delegreturn_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_layoutreturn_maxsz + \ decode_delegreturn_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ @@ -916,34 +926,22 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes) static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) { - __be32 *p; - - p = xdr_reserve_space(xdr, len); - xdr_encode_opaque_fixed(p, buf, len); + WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0); } static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) { - __be32 *p; - - p = reserve_space(xdr, 4 + len); - xdr_encode_opaque(p, str, len); + WARN_ON_ONCE(xdr_stream_encode_opaque(xdr, str, len) < 0); } static void encode_uint32(struct xdr_stream *xdr, u32 n) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(n); + WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0); } static void encode_uint64(struct xdr_stream *xdr, u64 n) { - __be32 *p; - - p = reserve_space(xdr, 8); - xdr_encode_hyper(p, n); + WARN_ON_ONCE(xdr_stream_encode_u64(xdr, n) < 0); } static void encode_nfs4_seqid(struct xdr_stream *xdr, @@ -1003,7 +1001,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs4_label *label, const struct nfs_server *server, - bool excl_check) + bool excl_check, const umode_t *umask) { char owner_name[IDMAP_NAMESZ]; char owner_group[IDMAP_NAMESZ]; @@ -1017,18 +1015,21 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, /* * We reserve enough space to write the entire attribute buffer at once. - * In the worst-case, this would be - * 16(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) - * = 40 bytes, plus any contribution from variable-length fields - * such as owner/group. */ if (iap->ia_valid & ATTR_SIZE) { bmval[0] |= FATTR4_WORD0_SIZE; len += 8; } + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + umask = NULL; if (iap->ia_valid & ATTR_MODE) { - bmval[1] |= FATTR4_WORD1_MODE; - len += 4; + if (umask) { + bmval[2] |= FATTR4_WORD2_MODE_UMASK; + len += 8; + } else { + bmval[1] |= FATTR4_WORD1_MODE; + len += 4; + } } if (iap->ia_valid & ATTR_UID) { owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ); @@ -1129,6 +1130,10 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, *p++ = cpu_to_be32(label->len); p = xdr_encode_opaque_fixed(p, label->label, label->len); } + if (bmval[2] & FATTR4_WORD2_MODE_UMASK) { + *p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO); + *p++ = cpu_to_be32(*umask); + } /* out: */ } @@ -1183,7 +1188,8 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * } encode_string(xdr, create->name->len, create->name->name); - encode_attrs(xdr, create->attrs, create->label, create->server, false); + encode_attrs(xdr, create->attrs, create->label, create->server, false, + &create->umask); } static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) @@ -1403,11 +1409,13 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op switch(arg->createmode) { case NFS4_CREATE_UNCHECKED: *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); - encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false); + encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false, + &arg->umask); break; case NFS4_CREATE_GUARDED: *p = cpu_to_be32(NFS4_CREATE_GUARDED); - encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false); + encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false, + &arg->umask); break; case NFS4_CREATE_EXCLUSIVE: *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); @@ -1416,7 +1424,8 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op case NFS4_CREATE_EXCLUSIVE4_1: *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); encode_nfs4_verifier(xdr, &arg->u.verifier); - encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, true); + encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, true, + &arg->umask); } } @@ -1672,7 +1681,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs { encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); encode_nfs4_stateid(xdr, &arg->stateid); - encode_attrs(xdr, arg->iap, arg->label, server, false); + encode_attrs(xdr, arg->iap, arg->label, server, false, NULL); } static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) @@ -2015,6 +2024,7 @@ encode_layoutreturn(struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args, struct compound_hdr *hdr) { + const struct pnfs_layoutdriver_type *lr_ops = NFS_SERVER(args->inode)->pnfs_curr_ld; __be32 *p; encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr); @@ -2029,10 +2039,11 @@ encode_layoutreturn(struct xdr_stream *xdr, spin_lock(&args->inode->i_lock); encode_nfs4_stateid(xdr, &args->stateid); spin_unlock(&args->inode->i_lock); - if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { - NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( - NFS_I(args->inode)->layout, xdr, args); - } else + if (args->ld_private->ops && args->ld_private->ops->encode) + args->ld_private->ops->encode(xdr, args, args->ld_private); + else if (lr_ops->encode_layoutreturn) + lr_ops->encode_layoutreturn(xdr, args); + else encode_uint32(xdr, 0); } @@ -2062,6 +2073,13 @@ static void encode_free_stateid(struct xdr_stream *xdr, encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr); encode_nfs4_stateid(xdr, &args->stateid); } +#else +static inline void +encode_layoutreturn(struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args, + struct compound_hdr *hdr) +{ +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2249,8 +2267,11 @@ static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); + if (args->lr_args) + encode_layoutreturn(xdr, args->lr_args, &hdr); + if (args->bitmask != NULL) + encode_getfattr(xdr, args->bitmask, &hdr); encode_close(xdr, args, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -2327,8 +2348,9 @@ static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); + if (args->lr_args) + encode_layoutreturn(xdr, args->lr_args, &hdr); encode_open_downgrade(xdr, args, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -2492,7 +2514,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz + 1; + replen = hdr.replen + op_decode_hdr_maxsz; encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, @@ -2671,7 +2693,10 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fhandle, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + if (args->lr_args) + encode_layoutreturn(xdr, args->lr_args, &hdr); + if (args->bitmask) + encode_getfattr(xdr, args->bitmask, &hdr); encode_delegreturn(xdr, args->stateid, &hdr); encode_nops(&hdr); } @@ -3027,20 +3052,15 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) { - __be32 *p; - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - *len = be32_to_cpup(p); - p = xdr_inline_decode(xdr, *len); - if (unlikely(!p)) - goto out_overflow; - *string = (char *)p; + ssize_t ret = xdr_stream_decode_opaque_inline(xdr, (void **)string, + NFS4_OPAQUE_LIMIT); + if (unlikely(ret < 0)) { + if (ret == -EBADMSG) + print_overflow_msg(__func__, xdr); + return -EIO; + } + *len = ret; return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) @@ -3107,7 +3127,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) } /* Dummy routine */ -static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp) +static int decode_ace(struct xdr_stream *xdr, void *ace) { __be32 *p; unsigned int strlen; @@ -3855,45 +3875,50 @@ out_overflow: return -EIO; } +static ssize_t decode_nfs4_string(struct xdr_stream *xdr, + struct nfs4_string *name, gfp_t gfp_flags) +{ + ssize_t ret; + + ret = xdr_stream_decode_string_dup(xdr, &name->data, + XDR_MAX_NETOBJ, gfp_flags); + name->len = 0; + if (ret > 0) + name->len = ret; + return ret; +} + static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, const struct nfs_server *server, kuid_t *uid, struct nfs4_string *owner_name) { - uint32_t len; - __be32 *p; - int ret = 0; + ssize_t len; + char *p; *uid = make_kuid(&init_user_ns, -2); if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) return -EIO; - if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) { - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - len = be32_to_cpup(p); - p = xdr_inline_decode(xdr, len); - if (unlikely(!p)) - goto out_overflow; - if (owner_name != NULL) { - owner_name->data = kmemdup(p, len, GFP_NOWAIT); - if (owner_name->data != NULL) { - owner_name->len = len; - ret = NFS_ATTR_FATTR_OWNER_NAME; - } - } else if (len < XDR_MAX_NETOBJ) { - if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0) - ret = NFS_ATTR_FATTR_OWNER; - else - dprintk("%s: nfs_map_name_to_uid failed!\n", - __func__); - } else - dprintk("%s: name too long (%u)!\n", - __func__, len); - bitmap[1] &= ~FATTR4_WORD1_OWNER; + if (!(bitmap[1] & FATTR4_WORD1_OWNER)) + return 0; + bitmap[1] &= ~FATTR4_WORD1_OWNER; + + if (owner_name != NULL) { + len = decode_nfs4_string(xdr, owner_name, GFP_NOWAIT); + if (len <= 0) + goto out; + dprintk("%s: name=%s\n", __func__, owner_name->data); + return NFS_ATTR_FATTR_OWNER_NAME; + } else { + len = xdr_stream_decode_opaque_inline(xdr, (void **)&p, + XDR_MAX_NETOBJ); + if (len <= 0 || nfs_map_name_to_uid(server, p, len, uid) != 0) + goto out; + dprintk("%s: uid=%d\n", __func__, (int)from_kuid(&init_user_ns, *uid)); + return NFS_ATTR_FATTR_OWNER; } - dprintk("%s: uid=%d\n", __func__, (int)from_kuid(&init_user_ns, *uid)); - return ret; -out_overflow: +out: + if (len != -EBADMSG) + return 0; print_overflow_msg(__func__, xdr); return -EIO; } @@ -3902,41 +3927,33 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, const struct nfs_server *server, kgid_t *gid, struct nfs4_string *group_name) { - uint32_t len; - __be32 *p; - int ret = 0; + ssize_t len; + char *p; *gid = make_kgid(&init_user_ns, -2); if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) return -EIO; - if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) { - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - len = be32_to_cpup(p); - p = xdr_inline_decode(xdr, len); - if (unlikely(!p)) - goto out_overflow; - if (group_name != NULL) { - group_name->data = kmemdup(p, len, GFP_NOWAIT); - if (group_name->data != NULL) { - group_name->len = len; - ret = NFS_ATTR_FATTR_GROUP_NAME; - } - } else if (len < XDR_MAX_NETOBJ) { - if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0) - ret = NFS_ATTR_FATTR_GROUP; - else - dprintk("%s: nfs_map_group_to_gid failed!\n", - __func__); - } else - dprintk("%s: name too long (%u)!\n", - __func__, len); - bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; + if (!(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) + return 0; + bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; + + if (group_name != NULL) { + len = decode_nfs4_string(xdr, group_name, GFP_NOWAIT); + if (len <= 0) + goto out; + dprintk("%s: name=%s\n", __func__, group_name->data); + return NFS_ATTR_FATTR_GROUP_NAME; + } else { + len = xdr_stream_decode_opaque_inline(xdr, (void **)&p, + XDR_MAX_NETOBJ); + if (len <= 0 || nfs_map_group_to_gid(server, p, len, gid) != 0) + goto out; + dprintk("%s: gid=%d\n", __func__, (int)from_kgid(&init_user_ns, *gid)); + return NFS_ATTR_FATTR_GROUP; } - dprintk("%s: gid=%d\n", __func__, (int)from_kgid(&init_user_ns, *gid)); - return ret; -out_overflow: +out: + if (len != -EBADMSG) + return 0; print_overflow_msg(__func__, xdr); return -EIO; } @@ -4259,15 +4276,12 @@ out_overflow: static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len) { - __be32 *p; - - p = xdr_inline_decode(xdr, len); - if (likely(p)) { - memcpy(buf, p, len); - return 0; + ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len); + if (unlikely(ret < 0)) { + print_overflow_msg(__func__, xdr); + return -EIO; } - print_overflow_msg(__func__, xdr); - return -EIO; + return 0; } static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) @@ -5058,7 +5072,7 @@ static int decode_rw_delegation(struct xdr_stream *xdr, if (decode_space_limit(xdr, &res->pagemod_limit) < 0) return -EIO; } - return decode_ace(xdr, NULL, res->server->nfs_client); + return decode_ace(xdr, NULL); out_overflow: print_overflow_msg(__func__, xdr); return -EIO; @@ -5625,8 +5639,6 @@ static int decode_exchange_id(struct xdr_stream *xdr, status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) return status; - if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) - return -EIO; memcpy(res->server_owner->major_id, dummy_str, dummy); res->server_owner->major_id_sz = dummy; @@ -5634,8 +5646,6 @@ static int decode_exchange_id(struct xdr_stream *xdr, status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) return status; - if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) - return -EIO; memcpy(res->server_scope->server_scope, dummy_str, dummy); res->server_scope->server_scope_sz = dummy; @@ -5650,16 +5660,12 @@ static int decode_exchange_id(struct xdr_stream *xdr, status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) return status; - if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) - return -EIO; memcpy(res->impl_id->domain, dummy_str, dummy); /* nii_name */ status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) return status; - if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) - return -EIO; memcpy(res->impl_id->name, dummy_str, dummy); /* nii_date */ @@ -6089,6 +6095,13 @@ static int decode_free_stateid(struct xdr_stream *xdr, res->status = decode_op_hdr(xdr, OP_FREE_STATEID); return res->status; } +#else +static inline +int decode_layoutreturn(struct xdr_stream *xdr, + struct nfs4_layoutreturn_res *res) +{ + return 0; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -6114,10 +6127,13 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, status = decode_putfh(xdr); if (status) goto out; + if (res->lr_res) { + status = decode_layoutreturn(xdr, res->lr_res); + res->lr_ret = status; + if (status) + goto out; + } status = decode_open_downgrade(xdr, res); - if (status != 0) - goto out; - decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6444,16 +6460,18 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; + if (res->lr_res) { + status = decode_layoutreturn(xdr, res->lr_res); + res->lr_ret = status; + if (status) + goto out; + } + if (res->fattr != NULL) { + status = decode_getfattr(xdr, res->fattr, res->server); + if (status != 0) + goto out; + } status = decode_close(xdr, res); - if (status != 0) - goto out; - /* - * Note: Server may do delete on close for this file - * in which case the getattr call will fail with - * an ESTALE error. Shouldn't be a problem, - * though, since fattr->valid will remain unset. - */ - decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6920,9 +6938,17 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, status = decode_putfh(xdr); if (status != 0) goto out; - status = decode_getfattr(xdr, res->fattr, res->server); - if (status != 0) - goto out; + if (res->lr_res) { + status = decode_layoutreturn(xdr, res->lr_res); + res->lr_ret = status; + if (status) + goto out; + } + if (res->fattr) { + status = decode_getfattr(xdr, res->fattr, res->server); + if (status != 0) + goto out; + } status = decode_delegreturn(xdr); out: return status; diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 919efd4a1a23..8f3d2acb81c3 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -291,7 +291,7 @@ objlayout_read_pagelist(struct nfs_pgio_header *hdr) &hdr->args.pgbase, hdr->args.offset, hdr->args.count); - dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", + dprintk("%s: inode(%lx) offset 0x%llx count 0x%zx eof=%d\n", __func__, inode->i_ino, offset, count, hdr->res.eof); err = objio_read_pagelist(hdr); @@ -504,10 +504,10 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p) } void -objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, - struct xdr_stream *xdr, +objlayout_encode_layoutreturn(struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args) { + struct pnfs_layout_hdr *pnfslay = args->layout; struct objlayout *objlay = OBJLAYOUT(pnfslay); struct objlayout_io_res *oir, *tmp; __be32 *start; diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 2641dbad345c..fc94a5872ed4 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -175,7 +175,6 @@ extern void objlayout_encode_layoutcommit( const struct nfs4_layoutcommit_args *); extern void objlayout_encode_layoutreturn( - struct pnfs_layout_hdr *, struct xdr_stream *, const struct nfs4_layoutreturn_args *); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 965db474f4b0..6e629b856a00 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -867,8 +867,7 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) static bool nfs_match_lock_context(const struct nfs_lock_context *l1, const struct nfs_lock_context *l2) { - return l1->lockowner.l_owner == l2->lockowner.l_owner - && l1->lockowner.l_pid == l2->lockowner.l_pid; + return l1->lockowner == l2->lockowner; } /** diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 259ef85f435a..dd042498ce7c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -54,6 +54,12 @@ static DEFINE_SPINLOCK(pnfs_spinlock); static LIST_HEAD(pnfs_modules_tbl); static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo); +static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, + struct list_head *free_me, + const struct pnfs_layout_range *range, + u32 seq); +static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, + struct list_head *tmp_list); /* Return the registered pnfs layout driver module matching given id */ static struct pnfs_layoutdriver_type * @@ -299,6 +305,49 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) } } +static void +pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, + u32 seq) +{ + if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) + iomode = IOMODE_ANY; + lo->plh_return_iomode = iomode; + set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); + if (seq != 0) { + WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); + lo->plh_return_seq = seq; + } +} + +static void +pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) +{ + lo->plh_return_iomode = 0; + lo->plh_return_seq = 0; + clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); +} + +static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) +{ + clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); + clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags); + smp_mb__after_atomic(); + wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); + rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); +} + +static void +pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg, + struct list_head *free_me) +{ + clear_bit(NFS_LSEG_ROC, &lseg->pls_flags); + clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); + if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) + pnfs_lseg_dec_and_remove_zero(lseg, free_me); + if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) + pnfs_lseg_dec_and_remove_zero(lseg, free_me); +} + /* * Mark a pnfs_layout_hdr and all associated layout segments as invalid * @@ -315,9 +364,17 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, .offset = 0, .length = NFS4_MAX_UINT64, }; + struct pnfs_layout_segment *lseg, *next; set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); - return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); + pnfs_clear_layoutreturn_info(lo); + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) + pnfs_clear_lseg_state(lseg, lseg_list); + pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && + !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) + pnfs_clear_layoutreturn_waitbit(lo); + return !list_empty(&lo->plh_segs); } static int @@ -396,27 +453,42 @@ pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) { - struct inode *ino = lseg->pls_layout->plh_inode; - - NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); + if (lseg != NULL) { + struct inode *inode = lseg->pls_layout->plh_inode; + NFS_SERVER(inode)->pnfs_curr_ld->free_lseg(lseg); + } } static void pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { - struct inode *inode = lo->plh_inode; - WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); - if (list_empty(&lo->plh_segs)) { + if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + return; + if (list_empty(&lo->plh_segs) && + !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) && + !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { if (atomic_read(&lo->plh_outstanding) == 0) set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); } - rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); +} + +static bool +pnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg) +{ + if (test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && + pnfs_layout_is_valid(lo)) { + pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); + list_move_tail(&lseg->pls_list, &lo->plh_return_segs); + return true; + } + return false; } void @@ -442,6 +514,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) } pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); + if (pnfs_cache_lseg_for_layoutreturn(lo, lseg)) + lseg = NULL; spin_unlock(&inode->i_lock); pnfs_free_lseg(lseg); pnfs_put_layout_hdr(lo); @@ -482,22 +556,15 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) struct pnfs_layout_hdr *lo = lseg->pls_layout; if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) return; - pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); - pnfs_free_lseg_async(lseg); + if (!pnfs_cache_lseg_for_layoutreturn(lo, lseg)) { + pnfs_get_layout_hdr(lo); + pnfs_free_lseg_async(lseg); + } } } EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); -static u64 -end_offset(u64 start, u64 len) -{ - u64 end; - - end = start + len; - return end >= start ? end : NFS4_MAX_UINT64; -} - /* * is l2 fully contained in l1? * start1 end1 @@ -510,33 +577,13 @@ pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, const struct pnfs_layout_range *l2) { u64 start1 = l1->offset; - u64 end1 = end_offset(start1, l1->length); + u64 end1 = pnfs_end_offset(start1, l1->length); u64 start2 = l2->offset; - u64 end2 = end_offset(start2, l2->length); + u64 end2 = pnfs_end_offset(start2, l2->length); return (start1 <= start2) && (end1 >= end2); } -/* - * is l1 and l2 intersecting? - * start1 end1 - * [----------------------------------) - * start2 end2 - * [----------------) - */ -static bool -pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, - const struct pnfs_layout_range *l2) -{ - u64 start1 = l1->offset; - u64 end1 = end_offset(start1, l1->length); - u64 start2 = l2->offset; - u64 end2 = end_offset(start2, l2->length); - - return (end1 == NFS4_MAX_UINT64 || end1 > start2) && - (end2 == NFS4_MAX_UINT64 || end2 > start1); -} - static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, struct list_head *tmp_list) { @@ -637,6 +684,20 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return remaining; } +static void +pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, + struct list_head *free_me, + const struct pnfs_layout_range *range, + u32 seq) +{ + struct pnfs_layout_segment *lseg, *next; + + list_for_each_entry_safe(lseg, next, &lo->plh_return_segs, pls_list) { + if (pnfs_match_lseg_recall(lseg, range, seq)) + list_move_tail(&lseg->pls_list, free_me); + } +} + /* note free_me must contain lsegs from a single layout_hdr */ void pnfs_free_lseg_list(struct list_head *free_me) @@ -701,6 +762,8 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, struct inode *inode; list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { + if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) + continue; inode = igrab(lo->plh_inode); if (inode == NULL) continue; @@ -816,14 +879,6 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) pnfs_destroy_layouts_byclid(clp, false); } -static void -pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) -{ - lo->plh_return_iomode = 0; - lo->plh_return_seq = 0; - clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); -} - /* update lo->plh_stateid with new if is more recent */ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, @@ -941,12 +996,31 @@ static void pnfs_clear_layoutcommit(struct inode *inode, } } -void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) +void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, + const struct pnfs_layout_range *range, + const nfs4_stateid *stateid) { - clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); - smp_mb__after_atomic(); - wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); - rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); + struct inode *inode = lo->plh_inode; + LIST_HEAD(freeme); + + spin_lock(&inode->i_lock); + if (!pnfs_layout_is_valid(lo) || !arg_stateid || + !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) + goto out_unlock; + if (stateid) { + u32 seq = be32_to_cpu(arg_stateid->seqid); + + pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); + pnfs_free_returned_lsegs(lo, &freeme, range, seq); + pnfs_set_layout_stateid(lo, stateid, true); + } else + pnfs_mark_layout_stateid_invalid(lo, &freeme); +out_unlock: + pnfs_clear_layoutreturn_waitbit(lo); + spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&freeme); + } static bool @@ -957,8 +1031,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, /* Serialise LAYOUTGET/LAYOUTRETURN */ if (atomic_read(&lo->plh_outstanding) != 0) return false; - if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) return false; + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); pnfs_get_layout_hdr(lo); if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { if (stateid != NULL) { @@ -978,11 +1053,29 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, return true; } +static void +pnfs_init_layoutreturn_args(struct nfs4_layoutreturn_args *args, + struct pnfs_layout_hdr *lo, + const nfs4_stateid *stateid, + enum pnfs_iomode iomode) +{ + struct inode *inode = lo->plh_inode; + + args->layout_type = NFS_SERVER(inode)->pnfs_curr_ld->id; + args->inode = inode; + args->range.iomode = iomode; + args->range.offset = 0; + args->range.length = NFS4_MAX_UINT64; + args->layout = lo; + nfs4_stateid_copy(&args->stateid, stateid); +} + static int pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, enum pnfs_iomode iomode, bool sync) { struct inode *ino = lo->plh_inode; + struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; struct nfs4_layoutreturn *lrp; int status = 0; @@ -996,15 +1089,12 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, goto out; } - nfs4_stateid_copy(&lrp->args.stateid, stateid); - lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; - lrp->args.inode = ino; - lrp->args.range.iomode = iomode; - lrp->args.range.offset = 0; - lrp->args.range.length = NFS4_MAX_UINT64; - lrp->args.layout = lo; + pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode); + lrp->args.ld_private = &lrp->ld_private; lrp->clp = NFS_SERVER(ino)->nfs_client; lrp->cred = lo->plh_lc_cred; + if (ld->prepare_layoutreturn) + ld->prepare_layoutreturn(&lrp->args); status = nfs4_proc_layoutreturn(lrp, sync); out: @@ -1067,7 +1157,7 @@ _pnfs_return_layout(struct inode *ino) struct nfs_inode *nfsi = NFS_I(ino); LIST_HEAD(tmp_list); nfs4_stateid stateid; - int status = 0, empty; + int status = 0; bool send; dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); @@ -1081,7 +1171,14 @@ _pnfs_return_layout(struct inode *ino) } /* Reference matched in nfs4_layoutreturn_release */ pnfs_get_layout_hdr(lo); - empty = list_empty(&lo->plh_segs); + /* Is there an outstanding layoutreturn ? */ + if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { + spin_unlock(&ino->i_lock); + if (wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, + TASK_UNINTERRUPTIBLE)) + goto out_put_layout_hdr; + spin_lock(&ino->i_lock); + } pnfs_clear_layoutcommit(ino, &tmp_list); pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); @@ -1095,7 +1192,7 @@ _pnfs_return_layout(struct inode *ino) } /* Don't send a LAYOUTRETURN if list was initially empty */ - if (empty) { + if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { spin_unlock(&ino->i_lock); dprintk("NFS: %s no layout segments to return\n", __func__); goto out_put_layout_hdr; @@ -1103,10 +1200,10 @@ _pnfs_return_layout(struct inode *ino) send = pnfs_prepare_layoutreturn(lo, &stateid, NULL); spin_unlock(&ino->i_lock); - pnfs_free_lseg_list(&tmp_list); if (send) status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); out_put_layout_hdr: + pnfs_free_lseg_list(&tmp_list); pnfs_put_layout_hdr(lo); out: dprintk("<-- %s status: %d\n", __func__, status); @@ -1141,105 +1238,125 @@ pnfs_commit_and_return_layout(struct inode *inode) return ret; } -bool pnfs_roc(struct inode *ino) +bool pnfs_roc(struct inode *ino, + struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + const struct rpc_cred *cred) { struct nfs_inode *nfsi = NFS_I(ino); struct nfs_open_context *ctx; struct nfs4_state *state; struct pnfs_layout_hdr *lo; - struct pnfs_layout_segment *lseg, *tmp; + struct pnfs_layout_segment *lseg, *next; nfs4_stateid stateid; - LIST_HEAD(tmp_list); - bool found = false, layoutreturn = false, roc = false; + enum pnfs_iomode iomode = 0; + bool layoutreturn = false, roc = false; + bool skip_read = false; + if (!nfs_have_layout(ino)) + return false; +retry: spin_lock(&ino->i_lock); lo = nfsi->layout; - if (!lo || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) + if (!lo || !pnfs_layout_is_valid(lo) || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) goto out_noroc; + if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { + pnfs_get_layout_hdr(lo); + spin_unlock(&ino->i_lock); + wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, + TASK_UNINTERRUPTIBLE); + pnfs_put_layout_hdr(lo); + goto retry; + } /* no roc if we hold a delegation */ - if (nfs4_check_delegation(ino, FMODE_READ)) - goto out_noroc; + if (nfs4_check_delegation(ino, FMODE_READ)) { + if (nfs4_check_delegation(ino, FMODE_WRITE)) + goto out_noroc; + skip_read = true; + } list_for_each_entry(ctx, &nfsi->open_files, list) { state = ctx->state; + if (state == NULL) + continue; /* Don't return layout if there is open file state */ - if (state != NULL && state->state != 0) + if (state->state & FMODE_WRITE) goto out_noroc; + if (state->state & FMODE_READ) + skip_read = true; } - /* always send layoutreturn if being marked so */ - if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) - layoutreturn = pnfs_prepare_layoutreturn(lo, - &stateid, NULL); - list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) { + if (skip_read && lseg->pls_range.iomode == IOMODE_READ) + continue; /* If we are sending layoutreturn, invalidate all valid lsegs */ - if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { - mark_lseg_invalid(lseg, &tmp_list); - found = true; - } + if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags)) + continue; + /* + * Note: mark lseg for return so pnfs_layout_remove_lseg + * doesn't invalidate the layout for us. + */ + set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); + if (!mark_lseg_invalid(lseg, &lo->plh_return_segs)) + continue; + pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); + } + + if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) + goto out_noroc; + /* ROC in two conditions: * 1. there are ROC lsegs * 2. we don't send layoutreturn */ - if (found && !layoutreturn) { - /* lo ref dropped in pnfs_roc_release() */ - pnfs_get_layout_hdr(lo); - roc = true; - } + /* lo ref dropped in pnfs_roc_release() */ + layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); + /* If the creds don't match, we can't compound the layoutreturn */ + if (!layoutreturn || cred != lo->plh_lc_cred) + goto out_noroc; + + roc = layoutreturn; + pnfs_init_layoutreturn_args(args, lo, &stateid, iomode); + res->lrs_present = 0; + layoutreturn = false; out_noroc: spin_unlock(&ino->i_lock); - pnfs_free_lseg_list(&tmp_list); pnfs_layoutcommit_inode(ino, true); + if (roc) { + struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; + if (ld->prepare_layoutreturn) + ld->prepare_layoutreturn(args); + return true; + } if (layoutreturn) - pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); - return roc; -} - -void pnfs_roc_release(struct inode *ino) -{ - struct pnfs_layout_hdr *lo; - - spin_lock(&ino->i_lock); - lo = NFS_I(ino)->layout; - pnfs_clear_layoutreturn_waitbit(lo); - if (atomic_dec_and_test(&lo->plh_refcount)) { - pnfs_detach_layout_hdr(lo); - spin_unlock(&ino->i_lock); - pnfs_free_layout_hdr(lo); - } else - spin_unlock(&ino->i_lock); -} - -void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) -{ - struct pnfs_layout_hdr *lo; - - spin_lock(&ino->i_lock); - lo = NFS_I(ino)->layout; - if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) - lo->plh_barrier = barrier; - spin_unlock(&ino->i_lock); - trace_nfs4_layoutreturn_on_close(ino, 0); + pnfs_send_layoutreturn(lo, &stateid, iomode, true); + return false; } -void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) +void pnfs_roc_release(struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + int ret) { - struct nfs_inode *nfsi = NFS_I(ino); - struct pnfs_layout_hdr *lo; - u32 current_seqid; - - spin_lock(&ino->i_lock); - lo = nfsi->layout; - current_seqid = be32_to_cpu(lo->plh_stateid.seqid); + struct pnfs_layout_hdr *lo = args->layout; + const nfs4_stateid *arg_stateid = NULL; + const nfs4_stateid *res_stateid = NULL; + struct nfs4_xdr_opaque_data *ld_private = args->ld_private; - /* Since close does not return a layout stateid for use as - * a barrier, we choose the worst-case barrier. - */ - *barrier = current_seqid + atomic_read(&lo->plh_outstanding); - spin_unlock(&ino->i_lock); + if (ret == 0) { + arg_stateid = &args->stateid; + if (res->lrs_present) + res_stateid = &res->stateid; + } + pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, + res_stateid); + if (ld_private && ld_private->ops && ld_private->ops->free) + ld_private->ops->free(ld_private); + pnfs_put_layout_hdr(lo); + trace_nfs4_layoutreturn_on_close(args->inode, 0); } bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) @@ -1252,13 +1369,11 @@ bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) * i_lock */ spin_lock(&ino->i_lock); lo = nfsi->layout; - if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); sleep = true; + } spin_unlock(&ino->i_lock); - - if (sleep) - rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); - return sleep; } @@ -1375,6 +1490,7 @@ alloc_init_layout_hdr(struct inode *ino, atomic_set(&lo->plh_refcount, 1); INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_segs); + INIT_LIST_HEAD(&lo->plh_return_segs); INIT_LIST_HEAD(&lo->plh_bulk_destroy); lo->plh_inode = ino; lo->plh_lc_cred = get_rpccred(ctx->cred); @@ -1841,7 +1957,10 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget; } - if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { + if (!pnfs_layout_is_valid(lo)) { + /* We have a completely new layout */ + pnfs_set_layout_stateid(lo, &res->stateid, true); + } else if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { /* existing state ID, make sure the sequence number matches. */ if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { dprintk("%s forget reply due to sequence\n", __func__); @@ -1851,12 +1970,10 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) } else { /* * We got an entirely new state ID. Mark all segments for the - * inode invalid, and don't bother validating the stateid - * sequence number. + * inode invalid, and retry the layoutget */ pnfs_mark_layout_stateid_invalid(lo, &free_me); - - pnfs_set_layout_stateid(lo, &res->stateid, true); + goto out_forget; } pnfs_get_lseg(lseg); @@ -1877,20 +1994,6 @@ out_forget: return ERR_PTR(-EAGAIN); } -static void -pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, - u32 seq) -{ - if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) - iomode = IOMODE_ANY; - lo->plh_return_iomode = iomode; - set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); - if (seq != 0) { - WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); - lo->plh_return_seq = seq; - } -} - /** * pnfs_mark_matching_lsegs_return - Free or return matching layout segments * @lo: pointer to layout header @@ -1945,17 +2048,18 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, .offset = 0, .length = NFS4_MAX_UINT64, }; - LIST_HEAD(free_me); bool return_now = false; spin_lock(&inode->i_lock); pnfs_set_plh_return_info(lo, range.iomode, 0); + /* Block LAYOUTGET */ + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); /* * mark all matching lsegs so that we are sure to have no live * segments at hand when sending layoutreturn. See pnfs_put_lseg() * for how it works. */ - if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0)) { + if (!pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0)) { nfs4_stateid stateid; enum pnfs_iomode iomode; @@ -1967,7 +2071,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, spin_unlock(&inode->i_lock); nfs_commit_inode(inode, 0); } - pnfs_free_lseg_list(&free_me); } EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); @@ -2063,7 +2166,7 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, * */ if (pgio->pg_lseg) { - seg_end = end_offset(pgio->pg_lseg->pls_range.offset, + seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length); req_start = req_offset(req); WARN_ON_ONCE(req_start >= seg_end); @@ -2286,6 +2389,10 @@ void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) struct nfs_pageio_descriptor pgio; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + /* Prevent deadlocks with layoutreturn! */ + pnfs_put_lseg(hdr->lseg); + hdr->lseg = NULL; + nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 5c295512c967..590e1e35781f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -96,6 +96,7 @@ enum { NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_RETURN, /* layoutreturn in progress */ + NFS_LAYOUT_RETURN_LOCK, /* Serialise layoutreturn */ NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ @@ -171,8 +172,8 @@ struct pnfs_layoutdriver_type { (struct nfs_server *server, struct pnfs_device *pdev, gfp_t gfp_flags); - void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, - struct xdr_stream *xdr, + int (*prepare_layoutreturn) (struct nfs4_layoutreturn_args *); + void (*encode_layoutreturn) (struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args); void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); @@ -181,7 +182,6 @@ struct pnfs_layoutdriver_type { struct xdr_stream *xdr, const struct nfs4_layoutcommit_args *args); int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); - void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data); }; struct pnfs_layout_hdr { @@ -190,6 +190,7 @@ struct pnfs_layout_hdr { struct list_head plh_layouts; /* other client layouts */ struct list_head plh_bulk_destroy; struct list_head plh_segs; /* layout segments list */ + struct list_head plh_return_segs; /* invalid layout segments */ unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ unsigned long plh_retry_timestamp; unsigned long plh_flags; @@ -270,10 +271,13 @@ int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, u32 seq); int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, struct list_head *lseg_list); -bool pnfs_roc(struct inode *ino); -void pnfs_roc_release(struct inode *ino); -void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); -void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier); +bool pnfs_roc(struct inode *ino, + struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + const struct rpc_cred *cred); +void pnfs_roc_release(struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + int ret); bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task); void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); @@ -292,7 +296,10 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, enum pnfs_iomode iomode, bool strict_iomode, gfp_t gfp_flags); -void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo); +void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, + const struct pnfs_layout_range *range, + const nfs4_stateid *stateid); void pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, @@ -360,10 +367,9 @@ void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags); void nfs4_pnfs_v3_ds_connect_unload(void); -void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, +int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, - unsigned int retrans, u32 version, u32 minor_version, - rpc_authflavor_t au_flavor); + unsigned int retrans, u32 version, u32 minor_version); struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags); @@ -559,6 +565,38 @@ pnfs_copy_range(struct pnfs_layout_range *dst, memcpy(dst, src, sizeof(*dst)); } +static inline u64 +pnfs_end_offset(u64 start, u64 len) +{ + if (NFS4_MAX_UINT64 - start <= len) + return NFS4_MAX_UINT64; + return start + len; +} + +/* + * Are 2 ranges intersecting? + * start1 end1 + * [----------------------------------) + * start2 end2 + * [----------------) + */ +static inline bool +pnfs_is_range_intersecting(u64 start1, u64 end1, u64 start2, u64 end2) +{ + return (end1 == NFS4_MAX_UINT64 || start2 < end1) && + (end2 == NFS4_MAX_UINT64 || start1 < end2); +} + +static inline bool +pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, + const struct pnfs_layout_range *l2) +{ + u64 end1 = pnfs_end_offset(l1->offset, l1->length); + u64 end2 = pnfs_end_offset(l2->offset, l2->length); + + return pnfs_is_range_intersecting(l1->offset, end1, l2->offset, end2); +} + extern unsigned int layoutstats_timer; #ifdef NFS_DEBUG @@ -630,23 +668,18 @@ pnfs_layoutcommit_outstanding(struct inode *inode) static inline bool -pnfs_roc(struct inode *ino) +pnfs_roc(struct inode *ino, + struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + const struct rpc_cred *cred) { return false; } static inline void -pnfs_roc_release(struct inode *ino) -{ -} - -static inline void -pnfs_roc_set_barrier(struct inode *ino, u32 barrier) -{ -} - -static inline void -pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) +pnfs_roc_release(struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + int ret) { } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 53b4705abcc7..7250b95549ec 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -600,8 +600,7 @@ static struct nfs_client *(*get_v3_ds_connect)( int ds_addrlen, int ds_proto, unsigned int ds_timeo, - unsigned int ds_retrans, - rpc_authflavor_t au_flavor); + unsigned int ds_retrans); static bool load_v3_ds_connect(void) { @@ -625,15 +624,13 @@ EXPORT_SYMBOL_GPL(nfs4_pnfs_v3_ds_connect_unload); static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, unsigned int timeo, - unsigned int retrans, - rpc_authflavor_t au_flavor) + unsigned int retrans) { struct nfs_client *clp = ERR_PTR(-EIO); struct nfs4_pnfs_ds_addr *da; int status = 0; - dprintk("--> %s DS %s au_flavor %d\n", __func__, - ds->ds_remotestr, au_flavor); + dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr); if (!load_v3_ds_connect()) goto out; @@ -657,7 +654,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, clp = get_v3_ds_connect(mds_srv, (struct sockaddr *)&da->da_addr, da->da_addrlen, IPPROTO_TCP, - timeo, retrans, au_flavor); + timeo, retrans); } if (IS_ERR(clp)) { @@ -676,15 +673,13 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, unsigned int timeo, unsigned int retrans, - u32 minor_version, - rpc_authflavor_t au_flavor) + u32 minor_version) { struct nfs_client *clp = ERR_PTR(-EIO); struct nfs4_pnfs_ds_addr *da; int status = 0; - dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, - au_flavor); + dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr); list_for_each_entry(da, &ds->ds_addrs, da_node) { dprintk("%s: DS %s: trying address %s\n", @@ -720,8 +715,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, clp = nfs4_set_ds_client(mds_srv, (struct sockaddr *)&da->da_addr, da->da_addrlen, IPPROTO_TCP, - timeo, retrans, minor_version, - au_flavor); + timeo, retrans, minor_version); if (IS_ERR(clp)) continue; @@ -751,35 +745,52 @@ out: /* * Create an rpc connection to the nfs4_pnfs_ds data server. * Currently only supports IPv4 and IPv6 addresses. - * If connection fails, make devid unavailable. + * If connection fails, make devid unavailable and return a -errno. */ -void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, +int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, - unsigned int retrans, u32 version, - u32 minor_version, rpc_authflavor_t au_flavor) + unsigned int retrans, u32 version, u32 minor_version) { - if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { - int err = 0; + int err; +again: + err = 0; + if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { if (version == 3) { err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, - retrans, au_flavor); + retrans); } else if (version == 4) { err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, - retrans, minor_version, - au_flavor); + retrans, minor_version); } else { dprintk("%s: unsupported DS version %d\n", __func__, version); err = -EPROTONOSUPPORT; } - if (err) - nfs4_mark_deviceid_unavailable(devid); nfs4_clear_ds_conn_bit(ds); } else { nfs4_wait_ds_connect(ds); + + /* what was waited on didn't connect AND didn't mark unavail */ + if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid)) + goto again; } + + /* + * At this point the ds->ds_clp should be ready, but it might have + * hit an error. + */ + if (!err) { + if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) { + WARN_ON_ONCE(ds->ds_clp || + !nfs4_test_deviceid_unavailable(devid)); + return -EINVAL; + } + err = nfs_client_init_status(ds->ds_clp); + } + + return err; } EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 001796bcd6c8..54e0f9f2dd94 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -55,7 +55,7 @@ #include <linux/nsproxy.h> #include <linux/rcupdate.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "nfs4_fs.h" #include "callback.h" @@ -531,39 +531,32 @@ static void nfs_show_mountd_netid(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { struct sockaddr *sap = (struct sockaddr *) &nfss->mountd_address; + char *proto = NULL; - seq_printf(m, ",mountproto="); switch (sap->sa_family) { case AF_INET: switch (nfss->mountd_protocol) { case IPPROTO_UDP: - seq_printf(m, RPCBIND_NETID_UDP); + proto = RPCBIND_NETID_UDP; break; case IPPROTO_TCP: - seq_printf(m, RPCBIND_NETID_TCP); + proto = RPCBIND_NETID_TCP; break; - default: - if (showdefaults) - seq_printf(m, "auto"); } break; case AF_INET6: switch (nfss->mountd_protocol) { case IPPROTO_UDP: - seq_printf(m, RPCBIND_NETID_UDP6); + proto = RPCBIND_NETID_UDP6; break; case IPPROTO_TCP: - seq_printf(m, RPCBIND_NETID_TCP6); + proto = RPCBIND_NETID_TCP6; break; - default: - if (showdefaults) - seq_printf(m, "auto"); } break; - default: - if (showdefaults) - seq_printf(m, "auto"); } + if (proto || showdefaults) + seq_printf(m, ",mountproto=%s", proto ?: "auto"); } static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, @@ -2904,7 +2897,7 @@ module_param(max_session_slots, ushort, 0644); MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " "requests the client will negotiate"); module_param(max_session_cb_slots, ushort, 0644); -MODULE_PARM_DESC(max_session_slots, "Maximum number of parallel NFSv4.1 " +MODULE_PARM_DESC(max_session_cb_slots, "Maximum number of parallel NFSv4.1 " "callbacks the client will process for a given server"); module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 4fe3eead3868..5a1d0ded8979 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -77,7 +77,6 @@ static const char *nfs_get_link(struct dentry *dentry, * symlinks can't do much... */ const struct inode_operations nfs_symlink_inode_operations = { - .readlink = generic_readlink, .get_link = nfs_get_link, .getattr = nfs_getattr, .setattr = nfs_setattr, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 53211838f72a..abb2c8a3be42 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -24,7 +24,7 @@ #include <linux/freezer.h> #include <linux/wait.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "delegation.h" #include "internal.h" @@ -728,8 +728,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) if (likely(head->wb_page && !PageSwapCache(head->wb_page))) { set_page_private(head->wb_page, 0); ClearPagePrivate(head->wb_page); - smp_mb__after_atomic(); - wake_up_page(head->wb_page, PG_private); clear_bit(PG_MAPPED, &head->wb_flags); } nfsi->nrequests--; @@ -1151,8 +1149,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) if (l_ctx && flctx && !(list_empty_careful(&flctx->flc_posix) && list_empty_careful(&flctx->flc_flock))) { - do_flush |= l_ctx->lockowner.l_owner != current->files - || l_ctx->lockowner.l_pid != current->tgid; + do_flush |= l_ctx->lockowner != current->files; } nfs_release_request(req); if (!do_flush) @@ -1787,8 +1784,9 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) (long long)req_offset(req)); if (status < 0) { nfs_context_set_write_error(req->wb_context, status); - nfs_inode_remove_request(req); - dprintk(", error = %d\n", status); + if (req->wb_page) + nfs_inode_remove_request(req); + dprintk_cont(", error = %d\n", status); goto next; } @@ -1796,12 +1794,13 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) * returned by the server against all stored verfs. */ if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) { /* We have a match */ - nfs_inode_remove_request(req); - dprintk(" OK\n"); + if (req->wb_page) + nfs_inode_remove_request(req); + dprintk_cont(" OK\n"); goto next; } /* We have a mismatch. Write the page again */ - dprintk(" mismatch\n"); + dprintk_cont(" mismatch\n"); nfs_mark_request_dirty(req); set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); next: |