From 0654cc726fc6eed6dca915fb65ba7975716ea080 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 Dec 2015 11:48:14 -0500 Subject: NFSv4.1/pNFS: Add a helper to mark the layout as returned This ensures that we don't reuse the stateid if a layout return or implied layout return means that we've returned all layout segments Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/callback_proc.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 807eb6ef4f91..716cbff24450 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -192,6 +192,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &args->cbl_range); } + pnfs_mark_layout_returned_if_empty(lo); unlock: spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&free_me_list); -- cgit v1.2.3 From fc7ff36747b991d1be0d68987066ea87eedbb43e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 Dec 2015 10:28:59 -0500 Subject: pNFS: If we have to delay the layout callback, mark the layout for return If the client needs to delay the layout callback, then speed up the recall process by marking the remaining layout segments to be actively returned by the client. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 14 ++++++++++++-- fs/nfs/pnfs.c | 4 +++- fs/nfs/pnfs.h | 3 +++ 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'fs/nfs/callback_proc.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 716cbff24450..34852ece4057 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -181,9 +181,19 @@ static u32 initiate_file_draining(struct nfs_client *clp, pnfs_layoutcommit_inode(ino, false); spin_lock(&ino->i_lock); - if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || - pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, + /* + * Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return) + */ + if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { + rv = NFS4ERR_DELAY; + goto unlock; + } + + if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &args->cbl_range)) { + pnfs_mark_matching_lsegs_return(lo, + &free_me_list, + &args->cbl_range); rv = NFS4ERR_DELAY; goto unlock; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b3fb6bb02275..360fe95c97b5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1756,7 +1756,7 @@ out_forget_reply: goto out; } -static void +void pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, struct pnfs_layout_range *return_range) @@ -1768,6 +1768,8 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, if (list_empty(&lo->plh_segs)) return; + assert_spin_locked(&lo->plh_inode->i_lock); + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) if (should_free_lseg(&lseg->pls_range, return_range)) { dprintk("%s: marking lseg %p iomode %d " diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index be24a759b655..d93c2ebc0fd3 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -266,6 +266,9 @@ int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, struct pnfs_layout_range *recall_range); +void pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, + struct list_head *tmp_list, + struct pnfs_layout_range *recall_range); bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); -- cgit v1.2.3 From 41c9127d6d588138c66b2614ac596ea63313acca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 Dec 2015 12:57:53 -0500 Subject: NFSv4.1/pNFS: Ensure we enforce RFC5661 Section 12.5.5.2.1 The RFC requires us to check if the server is recalling a stateid that we haven't yet received. If so, tell it to wait. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'fs/nfs/callback_proc.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 34852ece4057..1b24ad07d4f5 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -160,6 +160,22 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, return lo; } +/* + * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing) + */ +static bool pnfs_check_stateid_sequence(struct pnfs_layout_hdr *lo, + const nfs4_stateid *new) +{ + u32 oldseq, newseq; + + oldseq = be32_to_cpu(lo->plh_stateid.seqid); + newseq = be32_to_cpu(new->seqid); + + if (newseq > oldseq + 1) + return false; + return true; +} + static u32 initiate_file_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { @@ -175,6 +191,10 @@ static u32 initiate_file_draining(struct nfs_client *clp, ino = lo->plh_inode; spin_lock(&ino->i_lock); + if (!pnfs_check_stateid_sequence(lo, &args->cbl_stateid)) { + rv = NFS4ERR_DELAY; + goto unlock; + } pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); spin_unlock(&ino->i_lock); -- cgit v1.2.3 From e0d9243048fd18da695b8d3fe331176eac60866e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 Dec 2015 12:21:50 -0500 Subject: NFSv4.1/pNFS: Don't return NFS4ERR_DELAY unnecessarily in CB_LAYOUTRECALL If the client is promising to return the layout ASAP, then there is no need to return DELAY and have the server retry. Instead default to the normal procedure described in RFC5661. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/callback_proc.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 1b24ad07d4f5..724a9b756ab0 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -214,7 +214,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, pnfs_mark_matching_lsegs_return(lo, &free_me_list, &args->cbl_range); - rv = NFS4ERR_DELAY; + rv = NFS4_OK; goto unlock; } -- cgit v1.2.3 From e07db907eb80525874b7707c62cc6f5e975ef130 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 Dec 2015 14:07:23 -0500 Subject: NFSv4: List stateid information in the callback tracepoints The stateid is extremely valuable when debugging. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 16 +++++++++--- fs/nfs/nfs4trace.h | 69 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 6 deletions(-) (limited to 'fs/nfs/callback_proc.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 724a9b756ab0..e4dbab5dce6e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -83,8 +83,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, res = htonl(NFS4ERR_BADHANDLE); inode = nfs_delegation_find_inode(cps->clp, &args->fh); - if (inode == NULL) + if (inode == NULL) { + trace_nfs4_cb_recall(cps->clp, &args->fh, NULL, + &args->stateid, -ntohl(res)); goto out; + } /* Set up a helper thread to actually return the delegation */ switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { case 0: @@ -96,7 +99,8 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, default: res = htonl(NFS4ERR_RESOURCE); } - trace_nfs4_recall_delegation(inode, -ntohl(res)); + trace_nfs4_cb_recall(cps->clp, &args->fh, inode, + &args->stateid, -ntohl(res)); iput(inode); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); @@ -185,8 +189,11 @@ static u32 initiate_file_draining(struct nfs_client *clp, LIST_HEAD(free_me_list); lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); - if (!lo) + if (!lo) { + trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL, + &args->cbl_stateid, -rv); goto out; + } ino = lo->plh_inode; @@ -227,7 +234,8 @@ unlock: spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&free_me_list); pnfs_put_layout_hdr(lo); - trace_nfs4_cb_layoutrecall_inode(clp, &args->cbl_fh, ino, -rv); + trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino, + &args->cbl_stateid, -rv); iput(ino); out: return rv; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index d08d0c84b778..88b6b14ce71b 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -982,7 +982,6 @@ DEFINE_NFS4_INODE_EVENT(nfs4_set_acl); DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label); DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label); #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ -DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation); DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, TP_PROTO( @@ -1145,8 +1144,74 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, ), \ TP_ARGS(clp, fhandle, inode, error)) DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_getattr); -DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_layoutrecall_inode); +DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, + TP_PROTO( + const struct nfs_client *clp, + const struct nfs_fh *fhandle, + const struct inode *inode, + const nfs4_stateid *stateid, + int error + ), + + TP_ARGS(clp, fhandle, inode, stateid, error), + + TP_STRUCT__entry( + __field(int, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __string(dstaddr, clp ? + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown") + __field(int, stateid_seq) + __field(u32, stateid_hash) + ), + + TP_fast_assign( + __entry->error = error; + __entry->fhandle = nfs_fhandle_hash(fhandle); + if (inode != NULL) { + __entry->fileid = NFS_FILEID(inode); + __entry->dev = inode->i_sb->s_dev; + } else { + __entry->fileid = 0; + __entry->dev = 0; + } + __assign_str(dstaddr, clp ? + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown") + __entry->stateid_seq = + be32_to_cpu(stateid->seqid); + __entry->stateid_hash = + nfs_stateid_hash(stateid); + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "stateid=%d:0x%08x dstaddr=%s", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->stateid_seq, __entry->stateid_hash, + __get_str(dstaddr) + ) +); + +#define DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(name) \ + DEFINE_EVENT(nfs4_inode_stateid_callback_event, name, \ + TP_PROTO( \ + const struct nfs_client *clp, \ + const struct nfs_fh *fhandle, \ + const struct inode *inode, \ + const nfs4_stateid *stateid, \ + int error \ + ), \ + TP_ARGS(clp, fhandle, inode, stateid, error)) +DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_recall); +DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_layoutrecall_file); DECLARE_EVENT_CLASS(nfs4_idmap_event, TP_PROTO( -- cgit v1.2.3 From b20135d0b2431900a3a5395970ffb7e4f3767c8b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Dec 2015 09:28:06 -0500 Subject: NFSv4.1/pNFS: Don't queue up a new commit if the layout segment is invalid If the layout segment is invalid, then we should not be adding more write requests to the commit list. Instead, those writes should be replayed after requesting a new layout. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 ++ fs/nfs/direct.c | 12 ++++++++++++ fs/nfs/pnfs.c | 3 +++ fs/nfs/pnfs.h | 6 ++++++ fs/nfs/pnfs_nfs.c | 5 +++++ fs/nfs/write.c | 9 +++++++++ include/linux/nfs_xdr.h | 2 ++ 7 files changed, 39 insertions(+) (limited to 'fs/nfs/callback_proc.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index e4dbab5dce6e..2be8b252e3b1 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -233,6 +233,8 @@ static u32 initiate_file_draining(struct nfs_client *clp, unlock: spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&free_me_list); + /* Free all lsegs that are attached to commit buckets */ + nfs_commit_inode(ino, 0); pnfs_put_layout_hdr(lo); trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino, &args->cbl_stateid, -rv); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 14f77df79c25..a9a93927fe3e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -721,8 +721,20 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_direct_write_complete(dreq, data->inode); } +static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, + struct nfs_page *req) +{ + struct nfs_direct_req *dreq = cinfo->dreq; + + spin_lock(&dreq->lock); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_unlock(&dreq->lock); + nfs_mark_request_commit(req, NULL, cinfo, 0); +} + static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { .completion = nfs_direct_commit_complete, + .resched_write = nfs_direct_resched_write, }; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 360fe95c97b5..6593be7c0129 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -703,6 +703,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, ret = -EAGAIN; spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&lseg_list); + /* Free all lsegs that are attached to commit buckets */ + nfs_commit_inode(inode, 0); pnfs_put_layout_hdr(lo); iput(inode); } @@ -1811,6 +1813,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, pnfs_mark_matching_lsegs_return(lo, &free_me, &range); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&free_me); + nfs_commit_inode(inode, 0); } EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d93c2ebc0fd3..4bd7faf9ce50 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -412,6 +412,12 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg) return lseg; } +static inline bool +pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg) +{ + return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0; +} + /* Return true if a layout driver is being used for this mountpoint */ static inline int pnfs_enabled_sb(struct nfs_server *nfss) { diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 3c8e3a44e6ea..81ac6480f9e7 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -868,6 +868,11 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, buckets = cinfo->ds->buckets; list = &buckets[ds_commit_idx].written; if (list_empty(list)) { + if (!pnfs_is_valid_lseg(lseg)) { + spin_unlock(cinfo->lock); + cinfo->completion_ops->resched_write(cinfo, req); + return; + } /* Non-empty buckets hold a reference on the lseg. That ref * is normally transferred to the COMMIT call and released * there. It could also be released if the last req is pulled diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ae29f082c9c2..0aa8d6f23b4c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1676,6 +1676,13 @@ void nfs_retry_commit(struct list_head *page_list, } EXPORT_SYMBOL_GPL(nfs_retry_commit); +static void +nfs_commit_resched_write(struct nfs_commit_info *cinfo, + struct nfs_page *req) +{ + __set_page_dirty_nobuffers(req->wb_page); +} + /* * Commit dirty pages */ @@ -1777,6 +1784,7 @@ static const struct rpc_call_ops nfs_commit_ops = { static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { .completion = nfs_commit_release_pages, + .resched_write = nfs_commit_resched_write, }; int nfs_generic_commit_list(struct inode *inode, struct list_head *head, @@ -1823,6 +1831,7 @@ out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return res; } +EXPORT_SYMBOL_GPL(nfs_commit_inode); int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a8905b7d4d7f..bee3e60a7006 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1420,10 +1420,12 @@ struct nfs_mds_commit_info { struct list_head list; }; +struct nfs_commit_info; struct nfs_commit_data; struct nfs_inode; struct nfs_commit_completion_ops { void (*completion) (struct nfs_commit_data *data); + void (*resched_write) (struct nfs_commit_info *, struct nfs_page *); }; struct nfs_commit_info { -- cgit v1.2.3 From 4b0934baf9317e05c7568da1366a1d65f151d81f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Jan 2016 11:28:11 -0500 Subject: NFSv4.1/pNFS: Fix a race in initiate_file_draining() Peng Tao points out that the call to pnfs_mark_matching_lsegs_return() could race with pnfs_put_lseg(), in which case the layout segment is cleared, but no layoutreturn will be sent. Fix is to replace the call to pnfs_mark_matching_lsegs_invalid(). Reported-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/nfs/callback_proc.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 2be8b252e3b1..f0939d097406 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -216,11 +216,8 @@ static u32 initiate_file_draining(struct nfs_client *clp, goto unlock; } - if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, + if (pnfs_mark_matching_lsegs_return(lo, &free_me_list, &args->cbl_range)) { - pnfs_mark_matching_lsegs_return(lo, - &free_me_list, - &args->cbl_range); rv = NFS4_OK; goto unlock; } -- cgit v1.2.3