From 8cb7f74eeeb5441811d93f94b6138d4a5a9d8b20 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Sep 2012 17:23:14 -0400 Subject: NFS: nfs_parsed_mount_options can use unsigned int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fs/nfs/super.c: In function ‘nfs_compare_remount_data’: fs/nfs/super.c:2042:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2043:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2044:20: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2046:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2047:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2048:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2049:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2050:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] Seen with gcc (GCC) 4.6.3 20120306 (Red Hat 4.6.3-2). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 31fdb03225cd..89560be07e4a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -101,11 +101,11 @@ struct nfs_client_initdata { */ struct nfs_parsed_mount_data { int flags; - int rsize, wsize; - int timeo, retrans; - int acregmin, acregmax, + unsigned int rsize, wsize; + unsigned int timeo, retrans; + unsigned int acregmin, acregmax, acdirmin, acdirmax; - int namlen; + unsigned int namlen; unsigned int options; unsigned int bsize; unsigned int auth_flavor_len; -- cgit v1.2.3 From 05f4c350ee02e9461c6ae3a880ea326a06835e37 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Sep 2012 17:24:32 -0400 Subject: NFS: Discover NFSv4 server trunking when mounting "Server trunking" is a fancy named for a multi-homed NFS server. Trunking might occur if a client sends NFS requests for a single workload to multiple network interfaces on the same server. There are some implications for NFSv4 state management that make it useful for a client to know if a single NFSv4 server instance is multi-homed. (Note this is only a consideration for NFSv4, not for legacy versions of NFS, which are stateless). If a client cares about server trunking, no NFSv4 operations can proceed until that client determines who it is talking to. Thus server IP trunking discovery must be done when the client first encounters an unfamiliar server IP address. The nfs_get_client() function walks the nfs_client_list and matches on server IP address. The outcome of that walk tells us immediately if we have an unfamiliar server IP address. It invokes nfs_init_client() in this case. Thus, nfs4_init_client() is a good spot to perform trunking discovery. Discovery requires a client to establish a fresh client ID, so our client will now send SETCLIENTID or EXCHANGE_ID as the first NFS operation after a successful ping, rather than waiting for an application to perform an operation that requires NFSv4 state. The exact process for detecting trunking is different for NFSv4.0 and NFSv4.1, so a minorversion-specific init_client callout method is introduced. CLID_INUSE recovery is important for the trunking discovery process. CLID_INUSE is a sign the server recognizes the client's nfs_client_id4 id string, but the client is using the wrong principal this time for the SETCLIENTID operation. The SETCLIENTID must be retried with a series of different principals until one works, and then the rest of trunking discovery can proceed. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 +- fs/nfs/internal.h | 6 ++ fs/nfs/nfs4_fs.h | 8 ++ fs/nfs/nfs4client.c | 253 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 4 + fs/nfs/nfs4state.c | 182 ++++++++++++++++++++++++++++++++- include/linux/nfs_fs_sb.h | 1 + 7 files changed, 455 insertions(+), 2 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 92aed2e08bd5..57d2a5c3d933 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -498,7 +498,8 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, return nfs_found_client(cl_init, clp); } if (new) { - list_add(&new->cl_share_link, &nn->nfs_client_list); + list_add_tail(&new->cl_share_link, + &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); new->cl_flags = cl_init->init_flags; return rpc_ops->init_client(new, timeparms, ip_addr, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 89560be07e4a..89a795dc3027 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -483,6 +483,12 @@ extern int _nfs4_call_sync_session(struct rpc_clnt *clnt, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply); +extern int nfs40_walk_client_list(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred); +extern int nfs41_walk_client_list(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred); /* * Determine the device name as a string diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9cacc131a8a4..832503c7a00e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -191,6 +191,8 @@ struct nfs4_state_recovery_ops { int (*establish_clid)(struct nfs_client *, struct rpc_cred *); struct rpc_cred * (*get_clid_cred)(struct nfs_client *); int (*reclaim_complete)(struct nfs_client *); + int (*detect_trunking)(struct nfs_client *, struct nfs_client **, + struct rpc_cred *); }; struct nfs4_state_maintenance_ops { @@ -320,9 +322,15 @@ extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); +int nfs4_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **); +int nfs40_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **, struct rpc_cred *); #if defined(CONFIG_NFS_V4_1) struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); +int nfs41_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **, struct rpc_cred *); extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); #else static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 612f5ebaabac..14ddd4d30966 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -185,6 +185,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, rpc_authflavor_t authflavour) { char buf[INET6_ADDRSTRLEN + 1]; + struct nfs_client *old; int error; if (clp->cl_cons_state == NFS_CS_READY) { @@ -230,6 +231,17 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, if (!nfs4_has_session(clp)) nfs_mark_client_ready(clp, NFS_CS_READY); + + error = nfs4_discover_server_trunking(clp, &old); + if (error < 0) + goto error; + if (clp != old) { + clp->cl_preserve_clid = true; + nfs_put_client(clp); + clp = old; + atomic_inc(&clp->cl_count); + } + return clp; error: @@ -239,6 +251,247 @@ error: return ERR_PTR(error); } +/* + * Returns true if the client IDs match + */ +static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) +{ + if (a->cl_clientid != b->cl_clientid) { + dprintk("NFS: --> %s client ID %llx does not match %llx\n", + __func__, a->cl_clientid, b->cl_clientid); + return false; + } + dprintk("NFS: --> %s client ID %llx matches %llx\n", + __func__, a->cl_clientid, b->cl_clientid); + return true; +} + +/* + * SETCLIENTID just did a callback update with the callback ident in + * "drop," but server trunking discovery claims "drop" and "keep" are + * actually the same server. Swap the callback IDs so that "keep" + * will continue to use the callback ident the server now knows about, + * and so that "keep"'s original callback ident is destroyed when + * "drop" is freed. + */ +static void nfs4_swap_callback_idents(struct nfs_client *keep, + struct nfs_client *drop) +{ + struct nfs_net *nn = net_generic(keep->cl_net, nfs_net_id); + unsigned int save = keep->cl_cb_ident; + + if (keep->cl_cb_ident == drop->cl_cb_ident) + return; + + dprintk("%s: keeping callback ident %u and dropping ident %u\n", + __func__, keep->cl_cb_ident, drop->cl_cb_ident); + + spin_lock(&nn->nfs_client_lock); + + idr_replace(&nn->cb_ident_idr, keep, drop->cl_cb_ident); + keep->cl_cb_ident = drop->cl_cb_ident; + + idr_replace(&nn->cb_ident_idr, drop, save); + drop->cl_cb_ident = save; + + spin_unlock(&nn->nfs_client_lock); +} + +/** + * nfs40_walk_client_list - Find server that recognizes a client ID + * + * @new: nfs_client with client ID to test + * @result: OUT: found nfs_client, or new + * @cred: credential to use for trunking test + * + * Returns zero, a negative errno, or a negative NFS4ERR status. + * If zero is returned, an nfs_client pointer is planted in "result." + * + * NB: nfs40_walk_client_list() relies on the new nfs_client being + * the last nfs_client on the list. + */ +int nfs40_walk_client_list(struct nfs_client *new, + struct nfs_client **result, + struct rpc_cred *cred) +{ + struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); + struct nfs_client *pos, *n, *prev = NULL; + struct nfs4_setclientid_res clid = { + .clientid = new->cl_clientid, + .confirm = new->cl_confirm, + }; + int status; + + spin_lock(&nn->nfs_client_lock); + list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + /* If "pos" isn't marked ready, we can't trust the + * remaining fields in "pos" */ + if (pos->cl_cons_state < NFS_CS_READY) + continue; + + if (pos->rpc_ops != new->rpc_ops) + continue; + + if (pos->cl_proto != new->cl_proto) + continue; + + if (pos->cl_minorversion != new->cl_minorversion) + continue; + + if (pos->cl_clientid != new->cl_clientid) + continue; + + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + if (prev) + nfs_put_client(prev); + + status = nfs4_proc_setclientid_confirm(pos, &clid, cred); + if (status == 0) { + nfs4_swap_callback_idents(pos, new); + + nfs_put_client(pos); + *result = pos; + dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", + __func__, pos, atomic_read(&pos->cl_count)); + return 0; + } + if (status != -NFS4ERR_STALE_CLIENTID) { + nfs_put_client(pos); + dprintk("NFS: <-- %s status = %d, no result\n", + __func__, status); + return status; + } + + spin_lock(&nn->nfs_client_lock); + prev = pos; + } + + /* + * No matching nfs_client found. This should be impossible, + * because the new nfs_client has already been added to + * nfs_client_list by nfs_get_client(). + * + * Don't BUG(), since the caller is holding a mutex. + */ + if (prev) + nfs_put_client(prev); + spin_unlock(&nn->nfs_client_lock); + pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); + return -NFS4ERR_STALE_CLIENTID; +} + +#ifdef CONFIG_NFS_V4_1 +/* + * Returns true if the server owners match + */ +static bool +nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b) +{ + struct nfs41_server_owner *o1 = a->cl_serverowner; + struct nfs41_server_owner *o2 = b->cl_serverowner; + + if (o1->minor_id != o2->minor_id) { + dprintk("NFS: --> %s server owner minor IDs do not match\n", + __func__); + return false; + } + + if (o1->major_id_sz != o2->major_id_sz) + goto out_major_mismatch; + if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0) + goto out_major_mismatch; + + dprintk("NFS: --> %s server owners match\n", __func__); + return true; + +out_major_mismatch: + dprintk("NFS: --> %s server owner major IDs do not match\n", + __func__); + return false; +} + +/** + * nfs41_walk_client_list - Find nfs_client that matches a client/server owner + * + * @new: nfs_client with client ID to test + * @result: OUT: found nfs_client, or new + * @cred: credential to use for trunking test + * + * Returns zero, a negative errno, or a negative NFS4ERR status. + * If zero is returned, an nfs_client pointer is planted in "result." + * + * NB: nfs41_walk_client_list() relies on the new nfs_client being + * the last nfs_client on the list. + */ +int nfs41_walk_client_list(struct nfs_client *new, + struct nfs_client **result, + struct rpc_cred *cred) +{ + struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); + struct nfs_client *pos, *n, *prev = NULL; + int error; + + spin_lock(&nn->nfs_client_lock); + list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + /* If "pos" isn't marked ready, we can't trust the + * remaining fields in "pos", especially the client + * ID and serverowner fields. Wait for CREATE_SESSION + * to finish. */ + if (pos->cl_cons_state < NFS_CS_READY) { + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + if (prev) + nfs_put_client(prev); + prev = pos; + + error = nfs_wait_client_init_complete(pos); + if (error < 0) { + nfs_put_client(pos); + continue; + } + + spin_lock(&nn->nfs_client_lock); + } + + if (pos->rpc_ops != new->rpc_ops) + continue; + + if (pos->cl_proto != new->cl_proto) + continue; + + if (pos->cl_minorversion != new->cl_minorversion) + continue; + + if (!nfs4_match_clientids(pos, new)) + continue; + + if (!nfs4_match_serverowners(pos, new)) + continue; + + spin_unlock(&nn->nfs_client_lock); + dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", + __func__, pos, atomic_read(&pos->cl_count)); + + *result = pos; + return 0; + } + + /* + * No matching nfs_client found. This should be impossible, + * because the new nfs_client has already been added to + * nfs_client_list by nfs_get_client(). + * + * Don't BUG(), since the caller is holding a mutex. + */ + spin_unlock(&nn->nfs_client_lock); + pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); + return -NFS4ERR_STALE_CLIENTID; +} +#endif /* CONFIG_NFS_V4_1 */ + static void nfs4_destroy_server(struct nfs_server *server) { nfs_server_return_all_delegations(server); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 461411171966..b5834abfcbff 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5458,6 +5458,8 @@ int nfs4_destroy_clientid(struct nfs_client *clp) goto out; if (clp->cl_exchange_flags == 0) goto out; + if (clp->cl_preserve_clid) + goto out; cred = nfs4_get_exchange_id_cred(clp); ret = nfs4_proc_destroy_clientid(clp, cred); if (cred) @@ -6871,6 +6873,7 @@ static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .recover_lock = nfs4_lock_reclaim, .establish_clid = nfs4_init_clientid, .get_clid_cred = nfs4_get_setclientid_cred, + .detect_trunking = nfs40_discover_server_trunking, }; #if defined(CONFIG_NFS_V4_1) @@ -6882,6 +6885,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { .establish_clid = nfs41_init_clientid, .get_clid_cred = nfs4_get_exchange_id_cred, .reclaim_complete = nfs41_proc_reclaim_complete, + .detect_trunking = nfs41_discover_server_trunking, }; #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 38eeefd95375..5c4286643701 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -51,6 +51,8 @@ #include #include +#include + #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" @@ -63,7 +65,7 @@ #define OPENOWNER_POOL_SIZE 8 const nfs4_stateid zero_stateid; - +static DEFINE_MUTEX(nfs_clid_init_mutex); static LIST_HEAD(nfs4_clientid_list); int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) @@ -98,6 +100,55 @@ out: return status; } +/** + * nfs40_discover_server_trunking - Detect server IP address trunking (mv0) + * + * @clp: nfs_client under test + * @result: OUT: found nfs_client, or clp + * @cred: credential to use for trunking test + * + * Returns zero, a negative errno, or a negative NFS4ERR status. + * If zero is returned, an nfs_client pointer is planted in + * "result". + * + * Note: The returned client may not yet be marked ready. + */ +int nfs40_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred) +{ + struct nfs4_setclientid_res clid = { + .clientid = clp->cl_clientid, + .confirm = clp->cl_confirm, + }; + unsigned short port; + int status; + + port = nfs_callback_tcpport; + if (clp->cl_addr.ss_family == AF_INET6) + port = nfs_callback_tcpport6; + + status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); + if (status != 0) + goto out; + clp->cl_clientid = clid.clientid; + clp->cl_confirm = clid.confirm; + + status = nfs40_walk_client_list(clp, result, cred); + switch (status) { + case -NFS4ERR_STALE_CLIENTID: + set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + case 0: + /* Sustain the lease, even if it's empty. If the clientid4 + * goes stale it's of no use for trunking discovery. */ + nfs4_schedule_state_renewal(*result); + break; + } + +out: + return status; +} + struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) { struct rpc_cred *cred = NULL; @@ -277,6 +328,32 @@ out: return status; } +/** + * nfs41_discover_server_trunking - Detect server IP address trunking (mv1) + * + * @clp: nfs_client under test + * @result: OUT: found nfs_client, or clp + * @cred: credential to use for trunking test + * + * Returns NFS4_OK, a negative errno, or a negative NFS4ERR status. + * If NFS4_OK is returned, an nfs_client pointer is planted in + * "result". + * + * Note: The returned client may not yet be marked ready. + */ +int nfs41_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred) +{ + int status; + + status = nfs4_proc_exchange_id(clp, cred); + if (status != NFS4_OK) + return status; + + return nfs41_walk_client_list(clp, result, cred); +} + struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) { struct rpc_cred *cred; @@ -1705,6 +1782,109 @@ static int nfs4_purge_lease(struct nfs_client *clp) return 0; } +/** + * nfs4_discover_server_trunking - Detect server IP address trunking + * + * @clp: nfs_client under test + * @result: OUT: found nfs_client, or clp + * + * Returns zero or a negative errno. If zero is returned, + * an nfs_client pointer is planted in "result". + * + * Note: since we are invoked in process context, and + * not from inside the state manager, we cannot use + * nfs4_handle_reclaim_lease_error(). + */ +int nfs4_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **result) +{ + const struct nfs4_state_recovery_ops *ops = + clp->cl_mvops->reboot_recovery_ops; + rpc_authflavor_t *flavors, flav, save; + struct rpc_clnt *clnt; + struct rpc_cred *cred; + int i, len, status; + + dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname); + + len = NFS_MAX_SECFLAVORS; + flavors = kcalloc(len, sizeof(*flavors), GFP_KERNEL); + if (flavors == NULL) { + status = -ENOMEM; + goto out; + } + len = rpcauth_list_flavors(flavors, len); + if (len < 0) { + status = len; + goto out_free; + } + clnt = clp->cl_rpcclient; + save = clnt->cl_auth->au_flavor; + i = 0; + + mutex_lock(&nfs_clid_init_mutex); + status = -ENOENT; +again: + cred = ops->get_clid_cred(clp); + if (cred == NULL) + goto out_unlock; + + status = ops->detect_trunking(clp, result, cred); + put_rpccred(cred); + switch (status) { + case 0: + break; + + case -EACCES: + if (clp->cl_machine_cred == NULL) + break; + /* Handle case where the user hasn't set up machine creds */ + nfs4_clear_machine_cred(clp); + case -NFS4ERR_DELAY: + case -ETIMEDOUT: + case -EAGAIN: + ssleep(1); + dprintk("NFS: %s after status %d, retrying\n", + __func__, status); + goto again; + + case -NFS4ERR_CLID_INUSE: + case -NFS4ERR_WRONGSEC: + status = -EPERM; + if (i >= len) + break; + + flav = flavors[i++]; + if (flav == save) + flav = flavors[i++]; + clnt = rpc_clone_client_set_auth(clnt, flav); + if (IS_ERR(clnt)) { + status = PTR_ERR(clnt); + break; + } + clp->cl_rpcclient = clnt; + goto again; + + case -NFS4ERR_MINOR_VERS_MISMATCH: + status = -EPROTONOSUPPORT; + break; + + case -EKEYEXPIRED: + nfs4_warn_keyexpired(clp->cl_hostname); + case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery + * in nfs4_exchange_id */ + status = -EKEYEXPIRED; + } + +out_unlock: + mutex_unlock(&nfs_clid_init_mutex); +out_free: + kfree(flavors); +out: + dprintk("NFS: %s: status = %d\n", __func__, status); + return status; +} + #ifdef CONFIG_NFS_V4_1 void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 2e22fc7e47cf..a9e76ee1adca 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -82,6 +82,7 @@ struct nfs_client { /* The flags used for obtaining the clientid during EXCHANGE_ID */ u32 cl_exchange_flags; struct nfs4_session *cl_session; /* shared session */ + bool cl_preserve_clid; struct nfs41_server_owner *cl_serverowner; struct nfs41_server_scope *cl_serverscope; struct nfs41_impl_id *cl_implid; -- cgit v1.2.3 From 6296556f0b31eaff29f2a3aee2c17b7eae895b98 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 25 Sep 2012 14:55:57 +0800 Subject: NFS41: send real write size in layoutget For buffer write, block layout client scan inode mapping to find next hole and use offset-to-hole as layoutget length. Object layout client uses offset-to-isize as layoutget length. For direct write, both block layout and object layout use dreq->bytes_left. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 38 +++++++++++++++++++++++++++++++++++--- fs/nfs/direct.c | 7 +++++++ fs/nfs/internal.h | 1 + fs/nfs/objlayout/objio_osd.c | 9 ++++++++- fs/nfs/pnfs.c | 6 ++++-- fs/nfs/pnfs.h | 3 ++- 6 files changed, 57 insertions(+), 7 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index a34014a7f9a5..f1027b06a1a9 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -37,6 +37,7 @@ #include /* struct bio */ #include /* various write calls */ #include +#include #include "../pnfs.h" #include "../internal.h" @@ -1204,14 +1205,45 @@ bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return pnfs_generic_pg_test(pgio, prev, req); } +/* + * Return the number of contiguous bytes for a given inode + * starting at page frame idx. + */ +static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) +{ + struct address_space *mapping = inode->i_mapping; + pgoff_t end; + + /* Optimize common case that writes from 0 to end of file */ + end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); + if (end != NFS_I(inode)->npages) { + rcu_read_lock(); + end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); + rcu_read_unlock(); + } + + if (!end) + return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT); + else + return (end - idx) << PAGE_CACHE_SHIFT; +} + static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { if (pgio->pg_dreq != NULL && - !is_aligned_req(req, PAGE_CACHE_SIZE)) + !is_aligned_req(req, PAGE_CACHE_SIZE)) { nfs_pageio_reset_write_mds(pgio); - else - pnfs_generic_pg_init_write(pgio, req); + } else { + u64 wb_size; + if (pgio->pg_dreq == NULL) + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, + req->wb_index); + else + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); + + pnfs_generic_pg_init_write(pgio, req, wb_size); + } } static bool diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4be8673ee18d..cae26cbd59ee 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -191,6 +192,12 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) kref_put(&dreq->kref, nfs_direct_req_free); } +ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) +{ + return dreq->bytes_left; +} +EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); + /* * Collects and returns the final error value/byte-count. */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 89a795dc3027..59b133c5d652 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -464,6 +464,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) { inode_dio_wait(inode); } +extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ extern void __nfs4_read_done_cb(struct nfs_read_data *); diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index ea6d111b03e9..be731e6b7b9c 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -41,6 +41,7 @@ #include #include "objlayout.h" +#include "../internal.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -606,8 +607,14 @@ static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { unsigned long stripe_end = 0; + u64 wb_size; - pnfs_generic_pg_init_write(pgio, req); + if (pgio->pg_dreq == NULL) + wb_size = i_size_read(pgio->pg_inode) - req_offset(req); + else + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); + + pnfs_generic_pg_init_write(pgio, req, wb_size); if (unlikely(pgio->pg_lseg == NULL)) return; /* Not pNFS */ diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bd9769296e4e..9a2bcce45282 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1240,7 +1240,8 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); void -pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req, u64 wb_size) { BUG_ON(pgio->pg_lseg != NULL); @@ -1248,10 +1249,11 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * nfs_pageio_reset_write_mds(pgio); return; } + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), - req->wb_bytes, + wb_size, IOMODE_RW, GFP_NOFS); /* If no lseg, fall back to write through mds */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 6cede2c6c961..2d722dba1111 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -186,7 +186,8 @@ void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); -void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); +void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req, u64 wb_size); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); -- cgit v1.2.3