From aeabb3c96186a0f944fc2b1f25c84d5eb3a93fa9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Nov 2018 20:11:45 -0500 Subject: NFSv4: Fix a NFSv4 state manager deadlock Fix a deadlock whereby the NFSv4 state manager can get stuck in the delegation return code, waiting for a layout return to complete in another thread. If the server reboots before that other thread completes, then we need to be able to start a second state manager thread in order to perform recovery. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4state.c | 16 +++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8d59c9655ec4..1b994b527518 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -41,6 +41,8 @@ enum nfs4_client_state { NFS4CLNT_MOVED, NFS4CLNT_LEASE_MOVED, NFS4CLNT_DELEGATION_EXPIRED, + NFS4CLNT_RUN_MANAGER, + NFS4CLNT_DELEGRETURN_RUNNING, }; #define NFS4_RENEW_TIMEOUT 0x01 diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ffea57885394..d8decf2ec48f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1210,6 +1210,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) struct task_struct *task; char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; __module_get(THIS_MODULE); @@ -2503,6 +2504,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Ensure exclusive access to NFSv4 state */ do { + clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { section = "purge state"; status = nfs4_purge_lease(clp); @@ -2593,14 +2595,18 @@ static void nfs4_state_manager(struct nfs_client *clp) } nfs4_end_drain_session(clp); - if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { - nfs_client_return_marked_delegations(clp); - continue; + nfs4_clear_state_manager_bit(clp); + + if (!test_and_set_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state)) { + if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { + nfs_client_return_marked_delegations(clp); + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); + } + clear_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state); } - nfs4_clear_state_manager_bit(clp); /* Did we race with an attempt to give us more work? */ - if (clp->cl_state == 0) + if (!test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) return; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; -- cgit v1.2.3 From 99f2c55591fb5c1b536263970d98c2ebc2089906 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 21 Nov 2018 11:24:22 -0500 Subject: NFSv4.2 copy do not allocate memory under the lock Bruce pointed out that we shouldn't allocate memory while holding a lock in the nfs4_callback_offload() and handle_async_copy() that deal with a racing CB_OFFLOAD and reply to COPY case. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 22 +++++++++++----------- fs/nfs/nfs42proc.c | 19 ++++++++++--------- 2 files changed, 21 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 7b861bbc0b43..315967354954 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -686,20 +686,24 @@ __be32 nfs4_callback_offload(void *data, void *dummy, { struct cb_offloadargs *args = data; struct nfs_server *server; - struct nfs4_copy_state *copy; + struct nfs4_copy_state *copy, *tmp_copy; bool found = false; + copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); + if (!copy) + return htonl(NFS4ERR_SERVERFAULT); + spin_lock(&cps->clp->cl_lock); rcu_read_lock(); list_for_each_entry_rcu(server, &cps->clp->cl_superblocks, client_link) { - list_for_each_entry(copy, &server->ss_copies, copies) { + list_for_each_entry(tmp_copy, &server->ss_copies, copies) { if (memcmp(args->coa_stateid.other, - copy->stateid.other, + tmp_copy->stateid.other, sizeof(args->coa_stateid.other))) continue; - nfs4_copy_cb_args(copy, args); - complete(©->completion); + nfs4_copy_cb_args(tmp_copy, args); + complete(&tmp_copy->completion); found = true; goto out; } @@ -707,15 +711,11 @@ __be32 nfs4_callback_offload(void *data, void *dummy, out: rcu_read_unlock(); if (!found) { - copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); - if (!copy) { - spin_unlock(&cps->clp->cl_lock); - return htonl(NFS4ERR_SERVERFAULT); - } memcpy(©->stateid, &args->coa_stateid, NFS4_STATEID_SIZE); nfs4_copy_cb_args(copy, args); list_add_tail(©->copies, &cps->clp->pending_cb_stateids); - } + } else + kfree(copy); spin_unlock(&cps->clp->cl_lock); return 0; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ac5b784a1de0..fed06fd9998d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -137,31 +137,32 @@ static int handle_async_copy(struct nfs42_copy_res *res, struct file *dst, nfs4_stateid *src_stateid) { - struct nfs4_copy_state *copy; + struct nfs4_copy_state *copy, *tmp_copy; int status = NFS4_OK; bool found_pending = false; struct nfs_open_context *ctx = nfs_file_open_context(dst); + copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); + if (!copy) + return -ENOMEM; + spin_lock(&server->nfs_client->cl_lock); - list_for_each_entry(copy, &server->nfs_client->pending_cb_stateids, + list_for_each_entry(tmp_copy, &server->nfs_client->pending_cb_stateids, copies) { - if (memcmp(&res->write_res.stateid, ©->stateid, + if (memcmp(&res->write_res.stateid, &tmp_copy->stateid, NFS4_STATEID_SIZE)) continue; found_pending = true; - list_del(©->copies); + list_del(&tmp_copy->copies); break; } if (found_pending) { spin_unlock(&server->nfs_client->cl_lock); + kfree(copy); + copy = tmp_copy; goto out; } - copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); - if (!copy) { - spin_unlock(&server->nfs_client->cl_lock); - return -ENOMEM; - } memcpy(©->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE); init_completion(©->completion); copy->parent_state = ctx->state; -- cgit v1.2.3 From bb21ce0ad227b69ec0f83279297ee44232105d96 Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Wed, 21 Nov 2018 12:25:41 +0100 Subject: flexfiles: use per-mirror specified stateid for IO rfc8435 says: For tight coupling, ffds_stateid provides the stateid to be used by the client to access the file. However current implementation replaces per-mirror provided stateid with by open or lock stateid. Ensure that per-mirror stateid is used by ff_layout_write_prepare_v4 and nfs4_ff_layout_prepare_ds. Signed-off-by: Tigran Mkrtchyan Signed-off-by: Rick Macklem Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 21 +++++++++------------ fs/nfs/flexfilelayout/flexfilelayout.h | 4 ++++ fs/nfs/flexfilelayout/flexfilelayoutdev.c | 19 +++++++++++++++++++ 3 files changed, 32 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 86bcba40ca61..74b36ed883ca 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1361,12 +1361,7 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) task)) return; - if (ff_layout_read_prepare_common(task, hdr)) - return; - - if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, - hdr->args.lock_context, FMODE_READ) == -EIO) - rpc_exit(task, -EIO); /* lost lock, terminate I/O */ + ff_layout_read_prepare_common(task, hdr); } static void ff_layout_read_call_done(struct rpc_task *task, void *data) @@ -1542,12 +1537,7 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) task)) return; - if (ff_layout_write_prepare_common(task, hdr)) - return; - - if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, - hdr->args.lock_context, FMODE_WRITE) == -EIO) - rpc_exit(task, -EIO); /* lost lock, terminate I/O */ + ff_layout_write_prepare_common(task, hdr); } static void ff_layout_write_call_done(struct rpc_task *task, void *data) @@ -1742,6 +1732,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) hdr->args.fh = fh; + + if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + goto out_failed; + /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. @@ -1804,6 +1798,9 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) if (fh) hdr->args.fh = fh; + if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + goto out_failed; + /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 411798346e48..de50a342d5a5 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -215,6 +215,10 @@ unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, unsigned int maxnum); struct nfs_fh * nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); +int +nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, + u32 mirror_idx, + nfs4_stateid *stateid); struct nfs4_pnfs_ds * nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 74d8d5352438..d23347389626 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -370,6 +370,25 @@ out: return fh; } +int +nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, + u32 mirror_idx, + nfs4_stateid *stateid) +{ + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); + + if (!ff_layout_mirror_valid(lseg, mirror, false)) { + pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", + __func__, mirror_idx); + goto out; + } + + nfs4_stateid_copy(stateid, &mirror->stateid); + return 1; +out: + return 0; +} + /** * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call * @lseg: the layout segment we're operating on -- cgit v1.2.3