From e20772cbdf463c12088837e5a08bde1b876bfd25 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Jul 2021 07:58:49 -0400 Subject: NFSv4/pNFS: Fix a layoutget livelock loop If NFS_LAYOUT_RETURN_REQUESTED is set, but there is no value set for the layout plh_return_seq, we can end up in a livelock loop in which every layout segment retrieved by a new call to layoutget is immediately invalidated by pnfs_layout_need_return(). To get around this, we should just set plh_return_seq to the current value of the layout stateid's seqid. Fixes: d474f96104bd ("NFS: Don't return layout segments that are in use") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ef14ea0b6ab8..da5cacad6979 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -347,11 +347,15 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, iomode = IOMODE_ANY; lo->plh_return_iomode = iomode; set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); - if (seq != 0) { - WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); + /* + * We must set lo->plh_return_seq to avoid livelocks with + * pnfs_layout_need_return() + */ + if (seq == 0) + seq = be32_to_cpu(lo->plh_stateid.seqid); + if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq)) lo->plh_return_seq = seq; - pnfs_barrier_update(lo, seq); - } + pnfs_barrier_update(lo, seq); } static void -- cgit v1.2.3 From 7c0bbf2d3dcd13c906244e0c2e4a0ba3a33dab6a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Jul 2021 07:59:20 -0400 Subject: NFSv4/pNFS: Remove dead code Since commit 2b28a7bee453 ("fs, nfs: convert pnfs_layout_hdr.plh_refcount from atomic_t to refcount_t") it has not been legal to bump a zero refcount, so the code that tries to allow it if the NFS_LSEG_VALID flag is still set would cause trouble. Luckily, NFS_LSEG_VALID has its own refcount so we can never hit this bad code snippet in practice. Remove it to avoid confusion. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index da5cacad6979..856706180d33 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -596,10 +596,6 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) inode = lo->plh_inode; if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { - if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { - spin_unlock(&inode->i_lock); - return; - } pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); if (pnfs_cache_lseg_for_layoutreturn(lo, lseg)) -- cgit v1.2.3 From 45baadaad7bf9183651fb74f4ed1200da48505a5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Jul 2021 08:57:20 -0400 Subject: NFSv4/pNFS: Always allow update of a zero valued layout barrier A zero value for the layout barrier indicates that it has been cleared (since seqid '0' is an illegal value), so we should always allow it to be updated. Fixes: d29b468da4f9 ("pNFS/NFSv4: Improve rejection of out-of-order layouts") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 856706180d33..7775f6b5a53a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -335,7 +335,7 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2) static void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq) { - if (pnfs_seqid_is_newer(newseq, lo->plh_barrier)) + if (pnfs_seqid_is_newer(newseq, lo->plh_barrier) || !lo->plh_barrier) lo->plh_barrier = newseq; } -- cgit v1.2.3 From d6236a98b3bab07c0a1455fd1ab46f79c3978cdc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Jul 2021 08:57:21 -0400 Subject: NFSv4/pnfs: The layout barrier indicate a minimal value for the seqid The intention of the layout barrier is to ensure that we do not update the layout to match an older value than the current expectation. Fix the test in pnfs_layout_stateid_blocked() to reflect that it is legal for the seqid of the stateid to match that of the barrier. Fixes: aa95edf309ef ("NFSv4/pnfs: Fix the layout barrier update") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7775f6b5a53a..7c9090a28e5c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1000,7 +1000,7 @@ pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, { u32 seqid = be32_to_cpu(stateid->seqid); - return !pnfs_seqid_is_newer(seqid, lo->plh_barrier) && lo->plh_barrier; + return lo->plh_barrier && pnfs_seqid_is_newer(lo->plh_barrier, seqid); } /* lget is set to 1 if called from inside send_layoutget call chain */ -- cgit v1.2.3 From 5c2465dfd457f3015eebcc3ace50570e1d896aeb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Jul 2021 15:52:12 -0400 Subject: SUNRPC: Set rq_auth_stat in the pg_authenticate() callout In a few moments, rq_auth_stat will need to be explicitly set to rpc_auth_ok before execution gets to the dispatcher. svc_authenticate() already sets it, but it often gets reset to rpc_autherr_badcred right after that call, even when authentication is successful. Let's ensure that the pg_authenticate callout and svc_set_client() set it properly in every case. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/lockd/svc.c | 2 ++ fs/nfs/callback.c | 4 ++++ net/sunrpc/auth_gss/svcauth_gss.c | 4 ++++ net/sunrpc/svc.c | 4 +--- net/sunrpc/svcauth_unix.c | 6 +++++- 5 files changed, 16 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 2de048f80eb8..8e936999216c 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -649,6 +649,7 @@ static int lockd_authenticate(struct svc_rqst *rqstp) switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: case RPC_AUTH_UNIX: + rqstp->rq_auth_stat = rpc_auth_ok; if (rqstp->rq_proc == 0) return SVC_OK; if (is_callback(rqstp->rq_proc)) { @@ -659,6 +660,7 @@ static int lockd_authenticate(struct svc_rqst *rqstp) } return svc_set_client(rqstp); } + rqstp->rq_auth_stat = rpc_autherr_badcred; return SVC_DENIED; } diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 7817ad94a6ba..86d856de1389 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -429,6 +429,8 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) */ static int nfs_callback_authenticate(struct svc_rqst *rqstp) { + rqstp->rq_auth_stat = rpc_autherr_badcred; + switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: if (rqstp->rq_proc != CB_NULL) @@ -439,6 +441,8 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) if (svc_is_backchannel(rqstp)) return SVC_DENIED; } + + rqstp->rq_auth_stat = rpc_auth_ok; return SVC_OK; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 635449ed7af6..f89075070fb0 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1038,6 +1038,8 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) struct rpc_gss_wire_cred *gc = &svcdata->clcred; int stat; + rqstp->rq_auth_stat = rpc_autherr_badcred; + /* * A gss export can be specified either by: * export *(sec=krb5,rw) @@ -1053,6 +1055,8 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) stat = svcauth_unix_set_client(rqstp); if (stat == SVC_DROP || stat == SVC_CLOSE) return stat; + + rqstp->rq_auth_stat = rpc_auth_ok; return SVC_OK; } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 360dab62b6b4..2019d1203641 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1328,10 +1328,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) */ auth_res = svc_authenticate(rqstp); /* Also give the program a chance to reject this call: */ - if (auth_res == SVC_OK && progp) { - rqstp->rq_auth_stat = rpc_autherr_badcred; + if (auth_res == SVC_OK && progp) auth_res = progp->pg_authenticate(rqstp); - } if (auth_res != SVC_OK) trace_svc_authenticate(rqstp, auth_res); switch (auth_res) { diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index eacfebf326dd..d7ed7d49115a 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -681,8 +681,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) rqstp->rq_client = NULL; if (rqstp->rq_proc == 0) - return SVC_OK; + goto out; + rqstp->rq_auth_stat = rpc_autherr_badcred; ipm = ip_map_cached_get(xprt); if (ipm == NULL) ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class, @@ -719,6 +720,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) put_group_info(cred->cr_group_info); cred->cr_group_info = gi; } + +out: + rqstp->rq_auth_stat = rpc_auth_ok; return SVC_OK; } -- cgit v1.2.3 From 9082e1d914f8b27114352b1940bbcc7522f682e7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Jul 2021 15:52:19 -0400 Subject: SUNRPC: Eliminate the RQ_AUTHERR flag Now that there is an alternate method for returning an auth_stat value, replace the RQ_AUTHERR flag with use of that new method. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/callback_xdr.c | 3 ++- include/linux/sunrpc/svc.h | 2 -- include/trace/events/sunrpc.h | 3 +-- net/sunrpc/svc.c | 24 ++++-------------------- 4 files changed, 7 insertions(+), 25 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c5348ba81129..7ff99155b023 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -988,7 +988,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) out_invalidcred: pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n"); - return svc_return_autherr(rqstp, rpc_autherr_badcred); + rqstp->rq_auth_stat = rpc_autherr_badcred; + return rpc_success; } /* diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 35f12963e1ff..63c9210cae06 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -275,7 +275,6 @@ struct svc_rqst { #define RQ_VICTIM (5) /* about to be shut down */ #define RQ_BUSY (6) /* request is busy */ #define RQ_DATA (7) /* request has data */ -#define RQ_AUTHERR (8) /* Request status is auth error */ unsigned long rq_flags; /* flags field */ ktime_t rq_qtime; /* enqueue time */ @@ -533,7 +532,6 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); -__be32 svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err); __be32 svc_generic_init_request(struct svc_rqst *rqstp, const struct svc_program *progp, struct svc_process_info *procinfo); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index c7d9e6c7a979..169b93e4dbc1 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1539,8 +1539,7 @@ DEFINE_SVCXDRBUF_EVENT(sendto); svc_rqst_flag(SPLICE_OK) \ svc_rqst_flag(VICTIM) \ svc_rqst_flag(BUSY) \ - svc_rqst_flag(DATA) \ - svc_rqst_flag_end(AUTHERR) + svc_rqst_flag_end(DATA) #undef svc_rqst_flag #undef svc_rqst_flag_end diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 2019d1203641..95836bf514b5 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1163,22 +1163,6 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} #endif -__be32 -svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err) -{ - set_bit(RQ_AUTHERR, &rqstp->rq_flags); - return auth_err; -} -EXPORT_SYMBOL_GPL(svc_return_autherr); - -static __be32 -svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp) -{ - if (test_and_clear_bit(RQ_AUTHERR, &rqstp->rq_flags)) - return *statp; - return rpc_auth_ok; -} - static int svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp) { @@ -1202,7 +1186,7 @@ svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp) test_bit(RQ_DROPME, &rqstp->rq_flags)) return 0; - if (test_bit(RQ_AUTHERR, &rqstp->rq_flags)) + if (rqstp->rq_auth_stat != rpc_auth_ok) return 1; if (*statp != rpc_success) @@ -1390,15 +1374,15 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto release_dropit; if (*statp == rpc_garbage_args) goto err_garbage; - rqstp->rq_auth_stat = svc_get_autherr(rqstp, statp); - if (rqstp->rq_auth_stat != rpc_auth_ok) - goto err_release_bad_auth; } else { dprintk("svc: calling dispatcher\n"); if (!process.dispatch(rqstp, statp)) goto release_dropit; /* Release reply info */ } + if (rqstp->rq_auth_stat != rpc_auth_ok) + goto err_release_bad_auth; + /* Check RPC status result */ if (*statp != rpc_success) resv->iov_len = ((void*)statp) - resv->iov_base + 4; -- cgit v1.2.3 From 7d34c96217cf3c2d37ca0a56ca0bc3c3bef1e189 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Jul 2021 15:52:25 -0400 Subject: NFS: Add a private local dispatcher for NFSv4 callback operations The client's NFSv4 callback service is the only remaining user of svc_generic_dispatch(). Note that the NFSv4 callback service doesn't use the .pc_encode and .pc_decode callouts in any substantial way, so they are removed. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/callback_xdr.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 7ff99155b023..e30374e363a6 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -992,6 +992,15 @@ out_invalidcred: return rpc_success; } +static int +nfs_callback_dispatch(struct svc_rqst *rqstp, __be32 *statp) +{ + const struct svc_procedure *procp = rqstp->rq_procinfo; + + *statp = procp->pc_func(rqstp); + return 1; +} + /* * Define NFS4 callback COMPOUND ops. */ @@ -1080,7 +1089,7 @@ const struct svc_version nfs4_callback_version1 = { .vs_proc = nfs4_callback_procedures1, .vs_count = nfs4_callback_count1, .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, - .vs_dispatch = NULL, + .vs_dispatch = nfs_callback_dispatch, .vs_hidden = true, .vs_need_cong_ctrl = true, }; @@ -1092,7 +1101,7 @@ const struct svc_version nfs4_callback_version4 = { .vs_proc = nfs4_callback_procedures1, .vs_count = nfs4_callback_count4, .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, - .vs_dispatch = NULL, + .vs_dispatch = nfs_callback_dispatch, .vs_hidden = true, .vs_need_cong_ctrl = true, }; -- cgit v1.2.3 From c35a810ce59524971c4a3b45faed4d0121e5a305 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Jul 2021 15:52:31 -0400 Subject: NFS: Remove unused callback void decoder Clean up: The callback RPC dispatcher no longer invokes these call outs, although svc_process_common() relies on seeing a .pc_encode function. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/callback_xdr.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index e30374e363a6..c1d08ab1fe22 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -63,11 +63,10 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp) return htonl(NFS4_OK); } -static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p) -{ - return xdr_argsize_check(rqstp, p); -} - +/* + * svc_process_common() looks for an XDR encoder to know when + * not to drop a Reply. + */ static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p) { return xdr_ressize_check(rqstp, p); @@ -1067,7 +1066,6 @@ static struct callback_op callback_ops[] = { static const struct svc_procedure nfs4_callback_procedures1[] = { [CB_NULL] = { .pc_func = nfs4_callback_null, - .pc_decode = nfs4_decode_void, .pc_encode = nfs4_encode_void, .pc_xdrressize = 1, .pc_name = "NULL", -- cgit v1.2.3 From 89ef17b6636f2ae3e4e4041f53be7c0118b4b6c1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Jul 2021 15:52:37 -0400 Subject: NFS: Extract the xdr_init_encode/decode() calls from decode_compound Clean up: Move the xdr_init_encode() and xdr_init_decode() calls into the dispatcher, just like the NFSD and lockd dispatchers. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/callback_xdr.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c1d08ab1fe22..bf0efec93da8 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -925,22 +925,15 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) { struct cb_compound_hdr_arg hdr_arg = { 0 }; struct cb_compound_hdr_res hdr_res = { NULL }; - struct xdr_stream xdr_in, xdr_out; - __be32 *p, status; struct cb_process_state cps = { .drc_status = 0, .clp = NULL, .net = SVC_NET(rqstp), }; unsigned int nops = 0; + __be32 status; - xdr_init_decode(&xdr_in, &rqstp->rq_arg, - rqstp->rq_arg.head[0].iov_base, NULL); - - p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); - xdr_init_encode(&xdr_out, &rqstp->rq_res, p, NULL); - - status = decode_compound_hdr_arg(&xdr_in, &hdr_arg); + status = decode_compound_hdr_arg(&rqstp->rq_arg_stream, &hdr_arg); if (status == htonl(NFS4ERR_RESOURCE)) return rpc_garbage_args; @@ -960,15 +953,15 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) cps.minorversion = hdr_arg.minorversion; hdr_res.taglen = hdr_arg.taglen; hdr_res.tag = hdr_arg.tag; - if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) { + if (encode_compound_hdr_res(&rqstp->rq_res_stream, &hdr_res) != 0) { if (cps.clp) nfs_put_client(cps.clp); return rpc_system_err; } while (status == 0 && nops != hdr_arg.nops) { - status = process_op(nops, rqstp, &xdr_in, - rqstp->rq_argp, &xdr_out, rqstp->rq_resp, - &cps); + status = process_op(nops, rqstp, &rqstp->rq_arg_stream, + rqstp->rq_argp, &rqstp->rq_res_stream, + rqstp->rq_resp, &cps); nops++; } @@ -996,6 +989,9 @@ nfs_callback_dispatch(struct svc_rqst *rqstp, __be32 *statp) { const struct svc_procedure *procp = rqstp->rq_procinfo; + svcxdr_init_decode(rqstp); + svcxdr_init_encode(rqstp); + *statp = procp->pc_func(rqstp); return 1; } -- cgit v1.2.3 From 9eff97abef057c02a13bb3aa0e4821cd60fd80df Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Jul 2021 15:52:43 -0400 Subject: NFS: Clean up the synopsis of callback process_op() The xdr_stream and rq_arg and rq_res are already accessible via the @rqstp parameter. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/callback_xdr.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index bf0efec93da8..4c48d85f6517 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -863,17 +863,16 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) } static __be32 process_op(int nop, struct svc_rqst *rqstp, - struct xdr_stream *xdr_in, void *argp, - struct xdr_stream *xdr_out, void *resp, - struct cb_process_state *cps) + struct cb_process_state *cps) { + struct xdr_stream *xdr_out = &rqstp->rq_res_stream; struct callback_op *op = &callback_ops[0]; unsigned int op_nr; __be32 status; long maxlen; __be32 res; - status = decode_op_hdr(xdr_in, &op_nr); + status = decode_op_hdr(&rqstp->rq_arg_stream, &op_nr); if (unlikely(status)) return status; @@ -903,9 +902,11 @@ static __be32 process_op(int nop, struct svc_rqst *rqstp, maxlen = xdr_out->end - xdr_out->p; if (maxlen > 0 && maxlen < PAGE_SIZE) { - status = op->decode_args(rqstp, xdr_in, argp); + status = op->decode_args(rqstp, &rqstp->rq_arg_stream, + rqstp->rq_argp); if (likely(status == 0)) - status = op->process_op(argp, resp, cps); + status = op->process_op(rqstp->rq_argp, rqstp->rq_resp, + cps); } else status = htonl(NFS4ERR_RESOURCE); @@ -914,7 +915,7 @@ encode_hdr: if (unlikely(res)) return res; if (op->encode_res != NULL && status == 0) - status = op->encode_res(rqstp, xdr_out, resp); + status = op->encode_res(rqstp, xdr_out, rqstp->rq_resp); return status; } @@ -959,9 +960,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) return rpc_system_err; } while (status == 0 && nops != hdr_arg.nops) { - status = process_op(nops, rqstp, &rqstp->rq_arg_stream, - rqstp->rq_argp, &rqstp->rq_res_stream, - rqstp->rq_resp, &cps); + status = process_op(nops, rqstp, &cps); nops++; } -- cgit v1.2.3 From ca7d1d1a0b975d3d8aaaeab008a07bb3d3c5ec7e Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Fri, 21 May 2021 15:09:38 -0400 Subject: NFSv4.2: remove restriction of copy size for inter-server copy. Currently inter-server copy is allowed only if the copy size is larger than (rsize*14) which is the over-head of the mount operation of the source export. This patch, relying on the delayed unmount feature, removes this restriction since the mount and unmount overhead is now not applicable for every inter-server copy. Signed-off-by: Dai Ngo Signed-off-by: Anna Schumaker --- fs/nfs/nfs4file.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index c820de58a661..c91565227ea2 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -158,13 +158,11 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, sync = true; retry: if (!nfs42_files_from_same_server(file_in, file_out)) { - /* for inter copy, if copy size if smaller than 12 RPC - * payloads, fallback to traditional copy. There are - * 14 RPCs during an NFSv4.x mount between source/dest - * servers. + /* + * for inter copy, if copy size is too small + * then fallback to generic copy. */ - if (sync || - count <= 14 * NFS_SERVER(file_inode(file_in))->rsize) + if (sync) return -EOPNOTSUPP; cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res), GFP_NOFS); -- cgit v1.2.3 From 79d534f8cbf9714e2ba735f5b6c97678d266b2de Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 18 Aug 2021 10:02:52 +0800 Subject: NFSv3: Delete duplicate judgement in nfs3_async_handle_jukebox As eb96d5c97b08 ("SUNRPC handle EKEYEXPIRED in call_refreshresult") commit handle EKEYEXPIRED in call_refreshresult, so there is only handle when "task->tk_status" is equal "-EJUKEBOX" in nfs3_async_handle_jukebox. Signed-off-by: Ye Bin Signed-off-by: Anna Schumaker --- fs/nfs/nfs3proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2299446b3b89..f7524310ddf4 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -49,8 +49,7 @@ nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode) { if (task->tk_status != -EJUKEBOX) return 0; - if (task->tk_status == -EJUKEBOX) - nfs_inc_stats(inode, NFSIOS_DELAY); + nfs_inc_stats(inode, NFSIOS_DELAY); task->tk_status = 0; rpc_restart_call(task); rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); -- cgit v1.2.3 From 7e134205f62955369619021a695cd78fefd32451 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 27 Aug 2021 14:37:17 -0400 Subject: NFSv4 introduce max_connect mount options This option will control up to how many xprts can the client establish to the server with a distinct address (that means nconnect connections are not counted towards this new limit). This patch is setting up nfs structures to keeep track of the max_connect limit (does not enforce it). The default value is kept at 1 so that no current mounts that don't want any additional connections would be effected. The maximum value is set at 16. Mounts to DS are not limited to default value of 1 but instead set to the maximum default value of 16 (NFS_MAX_TRANSPORTS). Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 1 + fs/nfs/fs_context.c | 7 +++++++ fs/nfs/internal.h | 2 ++ fs/nfs/nfs4client.c | 12 ++++++++++-- fs/nfs/super.c | 2 ++ include/linux/nfs_fs.h | 5 +++++ include/linux/nfs_fs_sb.h | 1 + 7 files changed, 28 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 330f65727c45..486dec59972b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_proto = cl_init->proto; clp->cl_nconnect = cl_init->nconnect; + clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1; clp->cl_net = get_net(cl_init->net); clp->cl_principal = "*"; diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index d95c9a39bc70..0d444a90f513 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -60,6 +60,7 @@ enum nfs_param { Opt_mountvers, Opt_namelen, Opt_nconnect, + Opt_max_connect, Opt_port, Opt_posix, Opt_proto, @@ -158,6 +159,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_u32 ("mountvers", Opt_mountvers), fsparam_u32 ("namlen", Opt_namelen), fsparam_u32 ("nconnect", Opt_nconnect), + fsparam_u32 ("max_connect", Opt_max_connect), fsparam_string("nfsvers", Opt_vers), fsparam_u32 ("port", Opt_port), fsparam_flag_no("posix", Opt_posix), @@ -770,6 +772,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, goto out_of_bounds; ctx->nfs_server.nconnect = result.uint_32; break; + case Opt_max_connect: + if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS) + goto out_of_bounds; + ctx->nfs_server.max_connect = result.uint_32; + break; case Opt_lookupcache: switch (result.uint_32) { case Opt_lookupcache_all: diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a36af04188c2..66fc936834f2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -67,6 +67,7 @@ struct nfs_client_initdata { int proto; u32 minorversion; unsigned int nconnect; + unsigned int max_connect; struct net *net; const struct rpc_timeout *timeparms; const struct cred *cred; @@ -121,6 +122,7 @@ struct nfs_fs_context { int port; unsigned short protocol; unsigned short nconnect; + unsigned short max_connect; unsigned short export_path_len; } nfs_server; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 28431acd1230..270caa1805a2 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -865,6 +865,7 @@ static int nfs4_set_client(struct nfs_server *server, const char *ip_addr, int proto, const struct rpc_timeout *timeparms, u32 minorversion, unsigned int nconnect, + unsigned int max_connect, struct net *net) { struct nfs_client_initdata cl_init = { @@ -883,6 +884,8 @@ static int nfs4_set_client(struct nfs_server *server, if (minorversion == 0) __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags); + else + cl_init.max_connect = max_connect; if (proto == XPRT_TRANSPORT_TCP) cl_init.nconnect = nconnect; @@ -952,8 +955,10 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, return ERR_PTR(-EINVAL); cl_init.hostname = buf; - if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) + if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) { cl_init.nconnect = mds_clp->cl_nconnect; + cl_init.max_connect = NFS_MAX_TRANSPORTS; + } if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); @@ -1122,6 +1127,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) &timeparms, ctx->minorversion, ctx->nfs_server.nconnect, + ctx->nfs_server.max_connect, fc->net_ns); if (error < 0) return error; @@ -1211,6 +1217,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, parent_client->cl_nconnect, + parent_client->cl_max_connect, parent_client->cl_net); if (!error) goto init_server; @@ -1226,6 +1233,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, parent_client->cl_nconnect, + parent_client->cl_max_connect, parent_client->cl_net); if (error < 0) goto error; @@ -1323,7 +1331,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, error = nfs4_set_client(server, hostname, sap, salen, buf, clp->cl_proto, clnt->cl_timeout, clp->cl_minorversion, - clp->cl_nconnect, net); + clp->cl_nconnect, clp->cl_max_connect, net); clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); if (error != 0) { nfs_server_insert_lists(server); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index fe58525cfed4..e65c83494c05 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -480,6 +480,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (clp->cl_nconnect > 0) seq_printf(m, ",nconnect=%u", clp->cl_nconnect); if (version == 4) { + if (clp->cl_max_connect > 1) + seq_printf(m, ",max_connect=%u", clp->cl_max_connect); if (nfss->port != NFS_PORT) seq_printf(m, ",port=%u", nfss->port); } else diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ce6474594872..b9a8b925db43 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -40,6 +40,11 @@ #include +/* + * These are the default for number of transports to different server IPs + */ +#define NFS_MAX_TRANSPORTS 16 + /* * These are the default flags for swap requests */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d71a0e90faeb..2a9acbfe00f0 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -62,6 +62,7 @@ struct nfs_client { u32 cl_minorversion;/* NFSv4 minorversion */ unsigned int cl_nconnect; /* Number of connections */ + unsigned int cl_max_connect; /* max number of xprts allowed */ const char * cl_principal; /* used for machine cred */ #if IS_ENABLED(CONFIG_NFS_V4) -- cgit v1.2.3 From dc48e0abee245e2f0361bd8d4e3b00f70450fab2 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 27 Aug 2021 14:37:18 -0400 Subject: SUNRPC enforce creation of no more than max_connect xprts If we are adding new transports via rpc_clnt_test_and_add_xprt() then check if we've reached the limit. Currently only pnfs path adds transports via that function but this is done in preparation when the client would add new transports when session trunking is detected. A warning is logged if the limit is reached. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 1 + include/linux/sunrpc/clnt.h | 2 ++ net/sunrpc/clnt.c | 9 +++++++++ 3 files changed, 12 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 486dec59972b..23e165d5ec9c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -541,6 +541,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, clnt->cl_principal = clp->cl_principal; clp->cl_rpcclient = clnt; + clnt->cl_max_connect = clp->cl_max_connect; return 0; } EXPORT_SYMBOL_GPL(nfs_create_rpc_client); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b2edd5fc2f0c..a4661646adc9 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -82,6 +82,7 @@ struct rpc_clnt { struct work_struct cl_work; }; const struct cred *cl_cred; + unsigned int cl_max_connect; /* max number of transports not to the same IP */ }; /* @@ -136,6 +137,7 @@ struct rpc_create_args { char *client_name; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ const struct cred *cred; + unsigned int max_connect; }; struct rpc_add_xprt_test { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 451ac7d031db..f056ff931444 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2787,6 +2787,15 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, struct rpc_cb_add_xprt_calldata *data; struct rpc_task *task; + if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) { + rcu_read_lock(); + pr_warn("SUNRPC: reached max allowed number (%d) did not add " + "transport to server: %s\n", clnt->cl_max_connect, + rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); + rcu_read_unlock(); + return -EINVAL; + } + data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) return -ENOMEM; -- cgit v1.2.3 From 2a7a451a9084877a0b9d335c77d57e4cda1e5882 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 27 Aug 2021 14:37:19 -0400 Subject: NFSv4.1 add network transport when session trunking is detected After trunking is discovered in nfs4_discover_server_trunking(), add the transport to the old client structure if the allowed limit of transports has not been reached. An example: there exists a multi-homed server and client mounts one server address and some volume and then doest another mount to a different address of the same server and perhaps a different volume. Previously, the client checks that this is a session trunkable servers (same server), and removes the newly created client structure along with its transport. Now, the client adds the connection from the 2nd mount into the xprt switch of the existing client (it leads to having 2 available connections). Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 270caa1805a2..af57332503be 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -402,6 +402,33 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp) return nfs4_init_callback(clp); } +static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old) +{ + struct sockaddr_storage clp_addr, old_addr; + struct sockaddr *clp_sap = (struct sockaddr *)&clp_addr; + struct sockaddr *old_sap = (struct sockaddr *)&old_addr; + size_t clp_salen; + struct xprt_create xprt_args = { + .ident = old->cl_proto, + .net = old->cl_net, + .servername = old->cl_hostname, + }; + + if (clp->cl_proto != old->cl_proto) + return; + clp_salen = rpc_peeraddr(clp->cl_rpcclient, clp_sap, sizeof(clp_addr)); + rpc_peeraddr(old->cl_rpcclient, old_sap, sizeof(old_addr)); + + if (clp_addr.ss_family != old_addr.ss_family) + return; + + xprt_args.dstaddr = clp_sap; + xprt_args.addrlen = clp_salen; + + rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args, + rpc_clnt_test_and_add_xprt, NULL); +} + /** * nfs4_init_client - Initialise an NFS4 client record * @@ -436,6 +463,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, * won't try to use it. */ nfs_mark_client_ready(clp, -EPERM); + if (old->cl_mvops->session_trunk) + nfs4_add_trunk(clp, old); } clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags); nfs_put_client(clp); -- cgit v1.2.3 From 8cfb9015280d49f9d92d5b0f88cedf5f0856c0fd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Aug 2021 14:00:56 -0400 Subject: NFS: Always provide aligned buffers to the RPC read layers Instead of messing around with XDR padding in the RDMA layer, we should just give the RPC layer an aligned buffer. Try to avoid creating extra RPC calls by aligning to the smaller value of ALIGN(len, rsize) and PAGE_SIZE. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/read.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 9f39e0a1a38b..08d6cc57cbc3 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -293,15 +293,19 @@ static int readpage_async_filler(void *data, struct page *page) { struct nfs_readdesc *desc = data; + struct inode *inode = page_file_mapping(page)->host; + unsigned int rsize = NFS_SERVER(inode)->rsize; struct nfs_page *new; - unsigned int len; + unsigned int len, aligned_len; int error; len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(desc->ctx, page, 0, len); + aligned_len = min_t(unsigned int, ALIGN(len, rsize), PAGE_SIZE); + + new = nfs_create_request(desc->ctx, page, 0, aligned_len); if (IS_ERR(new)) goto out_error; -- cgit v1.2.3