diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2008-04-04 13:12:31 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2008-04-04 13:12:31 +1100 |
commit | 1e5f29d953f092efb1870ae025a71b3ebfc8ccbc (patch) | |
tree | 0a5a522b7dd9e95a75e19c826f1774c9b1f19bea | |
parent | 735306ba119ae29feeba534c6d71429ced0bb684 (diff) | |
parent | 5d34d13932cf0cd99b600e66f0ee1d52ebd57c7d (diff) |
Merge commit 'nfs/linux-next'
37 files changed, 888 insertions, 498 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index b76df2165dec..137b584acfcf 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1638,30 +1638,6 @@ config NFS_V4 If unsure, say N. -config NFS_DIRECTIO - bool "Allow direct I/O on NFS files" - depends on NFS_FS - help - This option enables applications to perform uncached I/O on files - in NFS file systems using the O_DIRECT open() flag. When O_DIRECT - is set for a file, its data is not cached in the system's page - cache. Data is moved to and from user-level application buffers - directly. Unlike local disk-based file systems, NFS O_DIRECT has - no alignment restrictions. - - Unless your program is designed to use O_DIRECT properly, you are - much better off allowing the NFS client to manage data caching for - you. Misusing O_DIRECT can cause poor server performance or network - storms. This kernel build option defaults OFF to avoid exposing - system administrators unwittingly to a potentially hazardous - feature. - - For more details on NFS O_DIRECT, see fs/nfs/direct.c. - - If unsure, say N. This reduces the size of the NFS client, and - causes open() to return EINVAL if a file residing in NFS is - opened with the O_DIRECT flag. - config NFSD tristate "NFS server support" depends on INET @@ -1782,15 +1758,33 @@ config SUNRPC_XPRT_RDMA tristate depends on SUNRPC && INFINIBAND && EXPERIMENTAL default SUNRPC && INFINIBAND + help + This option enables an RPC client transport capability that + allows the NFS client to mount servers via an RDMA-enabled + transport. + + To compile RPC client RDMA transport support as a module, + choose M here: the module will be called xprtrdma. + + If unsure, say N. config SUNRPC_BIND34 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL + default n help - Provides kernel support for querying rpcbind servers via versions 3 - and 4 of the rpcbind protocol. The kernel automatically falls back - to version 2 if a remote rpcbind service does not support versions - 3 or 4. + RPC requests over IPv6 networks require support for larger + addresses when performing an RPC bind. Sun added support for + IPv6 addressing by creating two new versions of the rpcbind + protocol (RFC 1833). + + This option enables support in the kernel RPC client for + querying rpcbind servers via versions 3 and 4 of the rpcbind + protocol. The kernel automatically falls back to version 2 + if a remote rpcbind service does not support versions 3 or 4. + By themselves, these new versions do not provide support for + RPC over IPv6, but the new protocol versions are necessary to + support it. If unsure, say N to get traditional behavior (version 2 rpcbind requests only). @@ -1804,12 +1798,13 @@ config RPCSEC_GSS_KRB5 select CRYPTO_DES select CRYPTO_CBC help - Provides for secure RPC calls by means of a gss-api - mechanism based on Kerberos V5. This is required for - NFSv4. + Choose Y here to enable Secure RPC using the Kerberos version 5 + GSS-API mechanism (RFC 1964). - Note: Requires an auxiliary userspace daemon which may be found on - http://www.citi.umich.edu/projects/nfsv4/ + Secure RPC calls with Kerberos require an auxiliary user-space + daemon which may be found in the Linux nfs-utils package + available from http://linux-nfs.org/. In addition, user-space + Kerberos support should be installed. If unsure, say N. @@ -1823,11 +1818,12 @@ config RPCSEC_GSS_SPKM3 select CRYPTO_CAST5 select CRYPTO_CBC help - Provides for secure RPC calls by means of a gss-api - mechanism based on the SPKM3 public-key mechanism. + Choose Y here to enable Secure RPC using the SPKM3 public key + GSS-API mechansim (RFC 2025). - Note: Requires an auxiliary userspace daemon which may be found on - http://www.citi.umich.edu/projects/nfsv4/ + Secure RPC calls with SPKM3 require an auxiliary userspace + daemon which may be found in the Linux nfs-utils package + available from http://linux-nfs.org/. If unsure, say N. diff --git a/fs/lockd/host.c b/fs/lockd/host.c index f1ef49fff118..f23750db1650 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -42,11 +42,12 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, /* * Common host lookup routine for server & client */ -static struct nlm_host * -nlm_lookup_host(int server, const struct sockaddr_in *sin, - int proto, int version, const char *hostname, - unsigned int hostname_len, - const struct sockaddr_in *ssin) +static struct nlm_host *nlm_lookup_host(int server, + const struct sockaddr_in *sin, + int proto, u32 version, + const char *hostname, + unsigned int hostname_len, + const struct sockaddr_in *ssin) { struct hlist_head *chain; struct hlist_node *pos; @@ -55,7 +56,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, int hash; dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT - ", p=%d, v=%d, my role=%s, name=%.*s)\n", + ", p=%d, v=%u, my role=%s, name=%.*s)\n", NIPQUAD(ssin->sin_addr.s_addr), NIPQUAD(sin->sin_addr.s_addr), proto, version, server? "server" : "client", @@ -175,9 +176,10 @@ nlm_destroy_host(struct nlm_host *host) /* * Find an NLM server handle in the cache. If there is none, create it. */ -struct nlm_host * -nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, - const char *hostname, unsigned int hostname_len) +struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin, + int proto, u32 version, + const char *hostname, + unsigned int hostname_len) { struct sockaddr_in ssin = {0}; diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 908b23fadd05..e4d563543b11 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -18,6 +18,8 @@ #define NLMDBG_FACILITY NLMDBG_MONITOR +#define XDR_ADDRBUF_LEN (20) + static struct rpc_clnt * nsm_create(void); static struct rpc_program nsm_program; @@ -147,28 +149,55 @@ nsm_create(void) /* * XDR functions for NSM. + * + * See http://www.opengroup.org/ for details on the Network + * Status Monitor wire protocol. */ -static __be32 * -xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) +static __be32 *xdr_encode_nsm_string(__be32 *p, char *string) { - char buffer[20], *name; - - /* - * Use the dotted-quad IP address of the remote host as - * identifier. Linux statd always looks up the canonical - * hostname first for whatever remote hostname it receives, - * so this works alright. - */ - if (nsm_use_hostnames) { - name = argp->mon_name; - } else { - sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr)); + size_t len = strlen(string); + + if (len > SM_MAXSTRLEN) + len = SM_MAXSTRLEN; + return xdr_encode_opaque(p, string, len); +} + +/* + * "mon_name" specifies the host to be monitored. + * + * Linux uses a text version of the IP address of the remote + * host as the host identifier (the "mon_name" argument). + * + * Linux statd always looks up the canonical hostname first for + * whatever remote hostname it receives, so this works alright. + */ +static __be32 *xdr_encode_mon_name(__be32 *p, struct nsm_args *argp) +{ + char buffer[XDR_ADDRBUF_LEN + 1]; + char *name = argp->mon_name; + + if (!nsm_use_hostnames) { + snprintf(buffer, XDR_ADDRBUF_LEN, + NIPQUAD_FMT, NIPQUAD(argp->addr)); name = buffer; } - if (!(p = xdr_encode_string(p, name)) - || !(p = xdr_encode_string(p, utsname()->nodename))) + + return xdr_encode_nsm_string(p, name); +} + +/* + * The "my_id" argument specifies the hostname and RPC procedure + * to be called when the status manager receives notification + * (via the SM_NOTIFY call) that the state of host "mon_name" + * has changed. + */ +static __be32 *xdr_encode_my_id(__be32 *p, struct nsm_args *argp) +{ + p = xdr_encode_nsm_string(p, utsname()->nodename); + if (!p) return ERR_PTR(-EIO); + *p++ = htonl(argp->prog); *p++ = htonl(argp->vers); *p++ = htonl(argp->proc); @@ -176,18 +205,48 @@ xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) return p; } -static int -xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) +/* + * The "mon_id" argument specifies the non-private arguments + * of an SM_MON or SM_UNMON call. + */ +static __be32 *xdr_encode_mon_id(__be32 *p, struct nsm_args *argp) { - p = xdr_encode_common(rqstp, p, argp); - if (IS_ERR(p)) - return PTR_ERR(p); + p = xdr_encode_mon_name(p, argp); + if (!p) + return ERR_PTR(-EIO); - /* Surprise - there may even be room for an IPv6 address now */ + return xdr_encode_my_id(p, argp); +} + +/* + * The "priv" argument may contain private information required + * by the SM_MON call. This information will be supplied in the + * SM_NOTIFY call. + * + * Linux provides the raw IP address of the monitored host, + * left in network byte order. + */ +static __be32 *xdr_encode_priv(__be32 *p, struct nsm_args *argp) +{ *p++ = argp->addr; *p++ = 0; *p++ = 0; *p++ = 0; + + return p; +} + +static int +xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) +{ + p = xdr_encode_mon_id(p, argp); + if (IS_ERR(p)) + return PTR_ERR(p); + + p = xdr_encode_priv(p, argp); + if (IS_ERR(p)) + return PTR_ERR(p); + rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p); return 0; } @@ -195,7 +254,7 @@ xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) static int xdr_encode_unmon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) { - p = xdr_encode_common(rqstp, p, argp); + p = xdr_encode_mon_id(p, argp); if (IS_ERR(p)) return PTR_ERR(p); rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p); @@ -220,9 +279,11 @@ xdr_decode_stat(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp) } #define SM_my_name_sz (1+XDR_QUADLEN(SM_MAXSTRLEN)) -#define SM_my_id_sz (3+1+SM_my_name_sz) -#define SM_mon_id_sz (1+XDR_QUADLEN(20)+SM_my_id_sz) -#define SM_mon_sz (SM_mon_id_sz+4) +#define SM_my_id_sz (SM_my_name_sz+3) +#define SM_mon_name_sz (1+XDR_QUADLEN(SM_MAXSTRLEN)) +#define SM_mon_id_sz (SM_mon_name_sz+SM_my_id_sz) +#define SM_priv_sz (XDR_QUADLEN(SM_PRIV_SIZE)) +#define SM_mon_sz (SM_mon_id_sz+SM_priv_sz) #define SM_monres_sz 2 #define SM_unmonres_sz 1 diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 1ed8bd4de941..38c2f0b1dd7d 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -74,7 +74,9 @@ static const unsigned long nlm_timeout_min = 3; static const unsigned long nlm_timeout_max = 20; static const int nlm_port_min = 0, nlm_port_max = 65535; +#ifdef CONFIG_SYSCTL static struct ctl_table_header * nlm_sysctl_table; +#endif static unsigned long get_lockd_grace_period(void) { @@ -359,6 +361,8 @@ out: } EXPORT_SYMBOL(lockd_down); +#ifdef CONFIG_SYSCTL + /* * Sysctl parameters (same as module parameters, different interface). */ @@ -443,6 +447,8 @@ static ctl_table nlm_sysctl_root[] = { { .ctl_name = 0 } }; +#endif /* CONFIG_SYSCTL */ + /* * Module (and sysfs) parameters. */ @@ -516,15 +522,21 @@ module_param(nsm_use_hostnames, bool, 0644); static int __init init_nlm(void) { +#ifdef CONFIG_SYSCTL nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); return nlm_sysctl_table ? 0 : -ENOMEM; +#else + return 0; +#endif } static void __exit exit_nlm(void) { /* FIXME: delete all NLM clients */ nlm_shutdown_hosts(); +#ifdef CONFIG_SYSCTL unregister_sysctl_table(nlm_sysctl_table); +#endif } module_init(init_nlm); diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index df0f41e09885..ac6170c594a3 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ - pagelist.o proc.o read.o symlink.o unlink.o \ + direct.o pagelist.o proc.o read.o symlink.o unlink.o \ write.o namespace.o mount_clnt.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o @@ -14,5 +14,4 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o -nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-$(CONFIG_SYSCTL) += sysctl.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index c5c0175898f6..93dfd75aba7c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -170,6 +170,8 @@ static void nfs4_shutdown_client(struct nfs_client *clp) BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners)); if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) nfs_idmap_delete(clp); + + rpc_destroy_wait_queue(&clp->cl_rpcwaitq); #endif } @@ -680,10 +682,22 @@ static int nfs_init_server(struct nfs_server *server, if (error < 0) goto error; + server->port = data->nfs_server.port; + error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); if (error < 0) goto error; + /* Preserve the values of mount_server-related mount options */ + if (data->mount_server.addrlen) { + memcpy(&server->mountd_address, &data->mount_server.address, + data->mount_server.addrlen); + server->mountd_addrlen = data->mount_server.addrlen; + } + server->mountd_version = data->mount_server.version; + server->mountd_port = data->mount_server.port; + server->mountd_protocol = data->mount_server.protocol; + server->namelen = data->namlen; /* Create a client RPC handle for the NFSv3 ACL management interface */ nfs_init_server_aclclient(server); @@ -1062,6 +1076,8 @@ static int nfs4_init_server(struct nfs_server *server, server->acdirmin = data->acdirmin * HZ; server->acdirmax = data->acdirmax * HZ; + server->port = data->nfs_server.port; + error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); error: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 6cea7479c5b4..d583654a0b39 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1966,7 +1966,7 @@ force_lookup: if (!NFS_PROTO(inode)->access) goto out_notsup; - cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + cred = rpc_lookup_cred(); if (!IS_ERR(cred)) { res = nfs_do_access(inode, cred, mask); put_rpccred(cred); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 16844f98f50e..e44200579c8d 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -280,6 +280,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, .rpc_client = NFS_CLIENT(inode), .rpc_message = &msg, .callback_ops = &nfs_read_direct_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; unsigned int pgbase; @@ -323,7 +324,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->inode = inode; data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); - data->args.context = ctx; + data->args.context = get_nfs_open_context(ctx); data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; @@ -446,6 +447,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) struct rpc_task_setup task_setup_data = { .rpc_client = NFS_CLIENT(inode), .callback_ops = &nfs_write_direct_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; @@ -537,6 +539,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) .rpc_message = &msg, .callback_ops = &nfs_commit_direct_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; @@ -546,6 +549,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->args.fh = NFS_FH(data->inode); data->args.offset = 0; data->args.count = 0; + data->args.context = get_nfs_open_context(dreq->ctx); data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -682,6 +686,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, .rpc_client = NFS_CLIENT(inode), .rpc_message = &msg, .callback_ops = &nfs_write_direct_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; size_t wsize = NFS_SERVER(inode)->wsize; @@ -728,7 +733,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->inode = inode; data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); - data->args.context = ctx; + data->args.context = get_nfs_open_context(ctx); data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ef57a5ae5904..10e8b807e7ff 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -234,10 +234,8 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, ssize_t result; size_t count = iov_length(iov, nr_segs); -#ifdef CONFIG_NFS_DIRECTIO if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_read(iocb, iov, nr_segs, pos); -#endif dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -383,9 +381,7 @@ const struct address_space_operations nfs_file_aops = { .write_end = nfs_write_end, .invalidatepage = nfs_invalidate_page, .releasepage = nfs_release_page, -#ifdef CONFIG_NFS_DIRECTIO .direct_IO = nfs_direct_IO, -#endif .launder_page = nfs_launder_page, }; @@ -443,10 +439,8 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, ssize_t result; size_t count = iov_length(iov, nr_segs); -#ifdef CONFIG_NFS_DIRECTIO if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_write(iocb, iov, nr_segs, pos); -#endif dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a4c7cf2bff3a..15f787355d27 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -522,8 +522,12 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait) { - struct inode *inode = ctx->path.dentry->d_inode; + struct inode *inode; + if (ctx == NULL) + return; + + inode = ctx->path.dentry->d_inode; if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) return; list_del(&ctx->list); @@ -609,7 +613,7 @@ int nfs_open(struct inode *inode, struct file *filp) struct nfs_open_context *ctx; struct rpc_cred *cred; - cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred); @@ -1217,6 +1221,36 @@ static void nfs_destroy_inodecache(void) kmem_cache_destroy(nfs_inode_cachep); } +struct workqueue_struct *nfsiod_workqueue; + +/* + * start up the nfsiod workqueue + */ +static int nfsiod_start(void) +{ + struct workqueue_struct *wq; + dprintk("RPC: creating workqueue nfsiod\n"); + wq = create_singlethread_workqueue("nfsiod"); + if (wq == NULL) + return -ENOMEM; + nfsiod_workqueue = wq; + return 0; +} + +/* + * Destroy the nfsiod workqueue + */ +static void nfsiod_stop(void) +{ + struct workqueue_struct *wq; + + wq = nfsiod_workqueue; + if (wq == NULL) + return; + nfsiod_workqueue = NULL; + destroy_workqueue(wq); +} + /* * Initialize NFS */ @@ -1224,6 +1258,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfsiod_start(); + if (err) + goto out6; + err = nfs_fs_proc_init(); if (err) goto out5; @@ -1270,6 +1308,8 @@ out3: out4: nfs_fs_proc_exit(); out5: + nfsiod_stop(); +out6: return err; } @@ -1285,6 +1325,7 @@ static void __exit exit_nfs_fs(void) #endif unregister_nfs_fs(); nfs_fs_proc_exit(); + nfsiod_stop(); } /* Not quite true; I just maintain it */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 931992763e68..04ae867dddba 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -46,9 +46,9 @@ struct nfs_parsed_mount_data { struct sockaddr_storage address; size_t addrlen; char *hostname; - unsigned int version; + u32 version; unsigned short port; - int protocol; + unsigned short protocol; } mount_server; struct { @@ -56,7 +56,8 @@ struct nfs_parsed_mount_data { size_t addrlen; char *hostname; char *export_path; - int protocol; + unsigned short port; + unsigned short protocol; } nfs_server; struct security_mnt_opts lsm_opts; @@ -115,13 +116,8 @@ extern void nfs_destroy_readpagecache(void); extern int __init nfs_init_writepagecache(void); extern void nfs_destroy_writepagecache(void); -#ifdef CONFIG_NFS_DIRECTIO extern int __init nfs_init_directcache(void); extern void nfs_destroy_directcache(void); -#else -#define nfs_init_directcache() (0) -#define nfs_destroy_directcache() do {} while(0) -#endif /* nfs2xdr.c */ extern int nfs_stat_to_errno(int); @@ -146,6 +142,7 @@ extern struct rpc_procinfo nfs4_procedures[]; extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); /* inode.c */ +extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); extern int nfs_write_inode(struct inode *,int); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 1f7ea675e0c5..86a80b33ec82 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -428,7 +428,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) size_t hdrlen; unsigned int pglen, recvd; u32 len; - int status, nr; + int status, nr = 0; __be32 *end, *entry, *kaddr; if ((status = ntohl(*p++))) @@ -452,7 +452,12 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) kaddr = p = kmap_atomic(*page, KM_USER0); end = (__be32 *)((char *)p + pglen); entry = p; - for (nr = 0; *p++; nr++) { + + /* Make sure the packet actually has a value_follows and EOF entry */ + if ((entry + 1) > end) + goto short_pkt; + + for (; *p++; nr++) { if (p + 2 > end) goto short_pkt; p++; /* fileid */ @@ -467,18 +472,32 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) goto short_pkt; entry = p; } - if (!nr && (entry[0] != 0 || entry[1] == 0)) - goto short_pkt; + + /* + * Apparently some server sends responses that are a valid size, but + * contain no entries, and have value_follows==0 and EOF==0. For + * those, just set the EOF marker. + */ + if (!nr && entry[1] == 0) { + dprintk("NFS: readdir reply truncated!\n"); + entry[1] = 1; + } out: kunmap_atomic(kaddr, KM_USER0); return nr; short_pkt: + /* + * When we get a short packet there are 2 possibilities. We can + * return an error, or fix up the response to look like a valid + * response and return what we have so far. If there are no + * entries and the packet was short, then return -EIO. If there + * are valid entries in the response, return them and pretend that + * the call was successful, but incomplete. The caller can retry the + * readdir starting at the last cookie. + */ entry[0] = entry[1] = 0; - /* truncate listing ? */ - if (!nr) { - dprintk("NFS: readdir reply truncated!\n"); - entry[1] = 1; - } + if (!nr) + nr = -errno_NFSERR_IO; goto out; err_unmap: nr = -errno_NFSERR_IO; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 3917e2fa4e40..fb03048ac650 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -508,7 +508,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res struct page **page; size_t hdrlen; u32 len, recvd, pglen; - int status, nr; + int status, nr = 0; __be32 *entry, *end, *kaddr; status = ntohl(*p++); @@ -542,7 +542,12 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res kaddr = p = kmap_atomic(*page, KM_USER0); end = (__be32 *)((char *)p + pglen); entry = p; - for (nr = 0; *p++; nr++) { + + /* Make sure the packet actually has a value_follows and EOF entry */ + if ((entry + 1) > end) + goto short_pkt; + + for (; *p++; nr++) { if (p + 3 > end) goto short_pkt; p += 2; /* inode # */ @@ -581,18 +586,32 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res goto short_pkt; entry = p; } - if (!nr && (entry[0] != 0 || entry[1] == 0)) - goto short_pkt; + + /* + * Apparently some server sends responses that are a valid size, but + * contain no entries, and have value_follows==0 and EOF==0. For + * those, just set the EOF marker. + */ + if (!nr && entry[1] == 0) { + dprintk("NFS: readdir reply truncated!\n"); + entry[1] = 1; + } out: kunmap_atomic(kaddr, KM_USER0); return nr; short_pkt: + /* + * When we get a short packet there are 2 possibilities. We can + * return an error, or fix up the response to look like a valid + * response and return what we have so far. If there are no + * entries and the packet was short, then return -EIO. If there + * are valid entries in the response, return them and pretend that + * the call was successful, but incomplete. The caller can retry the + * readdir starting at the last cookie. + */ entry[0] = entry[1] = 0; - /* truncate listing ? */ - if (!nr) { - dprintk("NFS: readdir reply truncated!\n"); - entry[1] = 1; - } + if (!nr) + nr = -errno_NFSERR_IO; goto out; err_unmap: nr = -errno_NFSERR_IO; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ce07862c2fb..f38d0573be18 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -51,6 +51,7 @@ #include "nfs4_fs.h" #include "delegation.h" +#include "internal.h" #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -773,6 +774,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) .rpc_message = &msg, .callback_ops = &nfs4_open_confirm_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; int status; @@ -910,6 +912,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) .rpc_message = &msg, .callback_ops = &nfs4_open_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; int status; @@ -1315,6 +1318,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) .rpc_client = server->client, .rpc_message = &msg, .callback_ops = &nfs4_close_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; int status = -ENOMEM; @@ -1404,7 +1408,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) BUG_ON(nd->intent.open.flags & O_CREAT); } - cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); + cred = rpc_lookup_cred(); if (IS_ERR(cred)) return (struct dentry *)cred; parent = dentry->d_parent; @@ -1439,7 +1443,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st struct rpc_cred *cred; struct nfs4_state *state; - cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); + cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); state = nfs4_do_open(dir, &path, openflags, NULL, cred); @@ -1656,7 +1660,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); - cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); @@ -1892,7 +1896,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct rpc_cred *cred; int status = 0; - cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); + cred = rpc_lookup_cred(); if (IS_ERR(cred)) { status = PTR_ERR(cred); goto out; @@ -2761,10 +2765,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); nfs4_schedule_state_recovery(clp); if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) - rpc_wake_up_task(task); + rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); task->tk_status = 0; return -EAGAIN; case -NFS4ERR_DELAY: @@ -3235,6 +3239,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, .rpc_client = NFS_CLIENT(lsp->ls_state->inode), .rpc_message = &msg, .callback_ops = &nfs4_locku_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; @@ -3419,6 +3424,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f .rpc_client = NFS_CLIENT(state->inode), .rpc_message = &msg, .callback_ops = &nfs4_lock_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; int ret; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b962397004c1..7775435ea7a5 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -292,8 +292,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct spin_unlock(&clp->cl_lock); if (sp == new) get_rpccred(cred); - else + else { + rpc_destroy_wait_queue(&new->so_sequence.wait); kfree(new); + } return sp; } @@ -310,6 +312,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp) return; nfs4_remove_state_owner(clp, sp); spin_unlock(&clp->cl_lock); + rpc_destroy_wait_queue(&sp->so_sequence.wait); put_rpccred(cred); kfree(sp); } @@ -529,6 +532,7 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) spin_lock(&clp->cl_lock); nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); spin_unlock(&clp->cl_lock); + rpc_destroy_wait_queue(&lsp->ls_sequence.wait); kfree(lsp); } @@ -731,7 +735,7 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) list_add_tail(&seqid->list, &sequence->list); if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) goto unlock; - rpc_sleep_on(&sequence->wait, task, NULL, NULL); + rpc_sleep_on(&sequence->wait, task, NULL); status = -EAGAIN; unlock: spin_unlock(&sequence->lock); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index db1ed9c46ede..2b519f6325f9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -110,7 +110,7 @@ static int nfs4_stat_to_errno(int); #define decode_savefh_maxsz (op_decode_hdr_maxsz) #define encode_restorefh_maxsz (op_encode_hdr_maxsz) #define decode_restorefh_maxsz (op_decode_hdr_maxsz) -#define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) +#define encode_fsinfo_maxsz (encode_getattr_maxsz) #define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) #define decode_renew_maxsz (op_decode_hdr_maxsz) @@ -3481,7 +3481,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n size_t hdrlen; u32 recvd, pglen = rcvbuf->page_len; __be32 *end, *entry, *p, *kaddr; - unsigned int nr; + unsigned int nr = 0; int status; status = decode_op_hdr(xdr, OP_READDIR); @@ -3505,7 +3505,12 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n kaddr = p = kmap_atomic(page, KM_USER0); end = p + ((pglen + readdir->pgbase) >> 2); entry = p; - for (nr = 0; *p++; nr++) { + + /* Make sure the packet actually has a value_follows and EOF entry */ + if ((entry + 1) > end) + goto short_pkt; + + for (; *p++; nr++) { u32 len, attrlen, xlen; if (end - p < 3) goto short_pkt; @@ -3532,20 +3537,32 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n p += attrlen; /* attributes */ entry = p; } - if (!nr && (entry[0] != 0 || entry[1] == 0)) - goto short_pkt; + /* + * Apparently some server sends responses that are a valid size, but + * contain no entries, and have value_follows==0 and EOF==0. For + * those, just set the EOF marker. + */ + if (!nr && entry[1] == 0) { + dprintk("NFS: readdir reply truncated!\n"); + entry[1] = 1; + } out: kunmap_atomic(kaddr, KM_USER0); return 0; short_pkt: + /* + * When we get a short packet there are 2 possibilities. We can + * return an error, or fix up the response to look like a valid + * response and return what we have so far. If there are no + * entries and the packet was short, then return -EIO. If there + * are valid entries in the response, return them and pretend that + * the call was successful, but incomplete. The caller can retry the + * readdir starting at the last cookie. + */ dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr); entry[0] = entry[1] = 0; - /* truncate listing ? */ - if (!nr) { - dprintk("NFS: readdir reply truncated!\n"); - entry[1] = 1; - } - goto out; + if (nr) + goto out; err_unmap: kunmap_atomic(kaddr, KM_USER0); return -errno_NFSERR_IO; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 5a70be589bbe..d333f5fedca1 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -58,22 +58,19 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) return p; } -static void nfs_readdata_rcu_free(struct rcu_head *head) +static void nfs_readdata_free(struct nfs_read_data *p) { - struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_rdata_mempool); } -static void nfs_readdata_free(struct nfs_read_data *rdata) -{ - call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free); -} - void nfs_readdata_release(void *data) { - nfs_readdata_free(data); + struct nfs_read_data *rdata = data; + + put_nfs_open_context(rdata->args.context); + nfs_readdata_free(rdata); } static @@ -174,6 +171,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, .rpc_message = &msg, .callback_ops = call_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC | swap_flags, }; @@ -186,7 +184,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; - data->args.context = req->wb_context; + data->args.context = get_nfs_open_context(req->wb_context); data->res.fattr = &data->fattr; data->res.count = count; @@ -253,7 +251,6 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne data = nfs_readdata_alloc(1); if (!data) goto out_bad; - INIT_LIST_HEAD(&data->pages); list_add(&data->pages, &list); requests++; nbytes -= len; @@ -300,7 +297,6 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned if (!data) goto out_bad; - INIT_LIST_HEAD(&data->pages); pages = data->pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f9219024f31a..140174d9b62d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -441,10 +441,52 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) return sec_flavours[i].str; } +static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, + int showdefaults) +{ + struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address; + + switch (sap->sa_family) { + case AF_INET: { + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + seq_printf(m, ",mountaddr=" NIPQUAD_FMT, + NIPQUAD(sin->sin_addr.s_addr)); + break; + } + case AF_INET6: { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + seq_printf(m, ",mountaddr=" NIP6_FMT, + NIP6(sin6->sin6_addr)); + break; + } + default: + if (showdefaults) + seq_printf(m, ",mountaddr=unspecified"); + } + + if (nfss->mountd_version || showdefaults) + seq_printf(m, ",mountvers=%u", nfss->mountd_version); + if (nfss->mountd_port || showdefaults) + seq_printf(m, ",mountport=%u", nfss->mountd_port); + + switch (nfss->mountd_protocol) { + case IPPROTO_UDP: + seq_printf(m, ",mountproto=udp"); + break; + case IPPROTO_TCP: + seq_printf(m, ",mountproto=tcp"); + break; + default: + if (showdefaults) + seq_printf(m, ",mountproto=auto"); + } +} + /* * Describe the mount options in force on this server representation */ -static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) +static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, + int showdefaults) { static const struct proc_nfs_info { int flag; @@ -452,6 +494,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, const char *nostr; } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, + { NFS_MOUNT_INTR, ",intr", ",nointr" }, + { NFS_MOUNT_POSIX, ",posix", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", "" }, @@ -462,18 +506,22 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, }; const struct proc_nfs_info *nfs_infop; struct nfs_client *clp = nfss->nfs_client; - - seq_printf(m, ",vers=%d", clp->rpc_ops->version); - seq_printf(m, ",rsize=%d", nfss->rsize); - seq_printf(m, ",wsize=%d", nfss->wsize); + u32 version = clp->rpc_ops->version; + + seq_printf(m, ",vers=%u", version); + seq_printf(m, ",rsize=%u", nfss->rsize); + seq_printf(m, ",wsize=%u", nfss->wsize); + if (nfss->bsize != 0) + seq_printf(m, ",bsize=%u", nfss->bsize); + seq_printf(m, ",namlen=%u", nfss->namelen); if (nfss->acregmin != 3*HZ || showdefaults) - seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); + seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ); if (nfss->acregmax != 60*HZ || showdefaults) - seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); + seq_printf(m, ",acregmax=%u", nfss->acregmax/HZ); if (nfss->acdirmin != 30*HZ || showdefaults) - seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); + seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ); if (nfss->acdirmax != 60*HZ || showdefaults) - seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); + seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ); for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { if (nfss->flags & nfs_infop->flag) seq_puts(m, nfs_infop->str); @@ -482,9 +530,24 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, } seq_printf(m, ",proto=%s", rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO)); + if (version == 4) { + if (nfss->port != NFS_PORT) + seq_printf(m, ",port=%u", nfss->port); + } else + if (nfss->port) + seq_printf(m, ",port=%u", nfss->port); + seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ); seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries); seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); + + if (version != 4) + nfs_show_mountd_options(m, nfss, showdefaults); + +#ifdef CONFIG_NFS_V4 + if (clp->rpc_ops->version == 4) + seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); +#endif } /* @@ -529,10 +592,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, "\n\tcaps:\t"); seq_printf(m, "caps=0x%x", nfss->caps); - seq_printf(m, ",wtmult=%d", nfss->wtmult); - seq_printf(m, ",dtsize=%d", nfss->dtsize); - seq_printf(m, ",bsize=%d", nfss->bsize); - seq_printf(m, ",namelen=%d", nfss->namelen); + seq_printf(m, ",wtmult=%u", nfss->wtmult); + seq_printf(m, ",dtsize=%u", nfss->dtsize); + seq_printf(m, ",bsize=%u", nfss->bsize); + seq_printf(m, ",namlen=%u", nfss->namelen); #ifdef CONFIG_NFS_V4 if (nfss->nfs_client->rpc_ops->version == 4) { @@ -546,9 +609,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) /* * Display security flavor in effect for this mount */ - seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); + seq_printf(m, "\n\tsec:\tflavor=%u", auth->au_ops->au_flavor); if (auth->au_flavor) - seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); + seq_printf(m, ",pseudoflavor=%u", auth->au_flavor); /* * Display superblock I/O counters @@ -683,7 +746,6 @@ static int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) { char *p, *string, *secdata; - unsigned short port = 0; int rc; if (!raw) { @@ -798,7 +860,7 @@ static int nfs_parse_mount_options(char *raw, return 0; if (option < 0 || option > 65535) return 0; - port = option; + mnt->nfs_server.port = option; break; case Opt_rsize: if (match_int(args, &mnt->rsize)) @@ -1048,7 +1110,8 @@ static int nfs_parse_mount_options(char *raw, } } - nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, port); + nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, + mnt->nfs_server.port); return 1; @@ -1169,7 +1232,9 @@ static int nfs_validate_mount_data(void *options, args->acregmax = 60; args->acdirmin = 30; args->acdirmax = 60; + args->mount_server.port = 0; /* autobind unless user sets port */ args->mount_server.protocol = XPRT_TRANSPORT_UDP; + args->nfs_server.port = 0; /* autobind unless user sets port */ args->nfs_server.protocol = XPRT_TRANSPORT_TCP; switch (data->version) { @@ -1706,28 +1771,6 @@ static void nfs4_fill_super(struct super_block *sb) } /* - * If the user didn't specify a port, set the port number to - * the NFS version 4 default port. - */ -static void nfs4_default_port(struct sockaddr *sap) -{ - switch (sap->sa_family) { - case AF_INET: { - struct sockaddr_in *ap = (struct sockaddr_in *)sap; - if (ap->sin_port == 0) - ap->sin_port = htons(NFS_PORT); - break; - } - case AF_INET6: { - struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; - if (ap->sin6_port == 0) - ap->sin6_port = htons(NFS_PORT); - break; - } - } -} - -/* * Validate NFSv4 mount options */ static int nfs4_validate_mount_data(void *options, @@ -1751,6 +1794,7 @@ static int nfs4_validate_mount_data(void *options, args->acregmax = 60; args->acdirmin = 30; args->acdirmax = 60; + args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ args->nfs_server.protocol = XPRT_TRANSPORT_TCP; switch (data->version) { @@ -1767,9 +1811,6 @@ static int nfs4_validate_mount_data(void *options, &args->nfs_server.address)) goto out_no_address; - nfs4_default_port((struct sockaddr *) - &args->nfs_server.address); - switch (data->auth_flavourlen) { case 0: args->auth_flavors[0] = RPC_AUTH_UNIX; @@ -1827,9 +1868,6 @@ static int nfs4_validate_mount_data(void *options, &args->nfs_server.address)) return -EINVAL; - nfs4_default_port((struct sockaddr *) - &args->nfs_server.address); - switch (args->auth_flavor_len) { case 0: args->auth_flavors[0] = RPC_AUTH_UNIX; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 757415363422..3adf8b266461 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -234,7 +234,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) if (data == NULL) goto out; - data->cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); + data->cred = rpc_lookup_cred(); if (IS_ERR(data->cred)) { status = PTR_ERR(data->cred); goto out_free; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index bed63416a55b..ce40cadb15db 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -59,19 +59,13 @@ struct nfs_write_data *nfs_commit_alloc(void) return p; } -static void nfs_commit_rcu_free(struct rcu_head *head) +void nfs_commit_free(struct nfs_write_data *p) { - struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_commit_mempool); } -void nfs_commit_free(struct nfs_write_data *wdata) -{ - call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free); -} - struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) { struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); @@ -93,21 +87,18 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) return p; } -static void nfs_writedata_rcu_free(struct rcu_head *head) +static void nfs_writedata_free(struct nfs_write_data *p) { - struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_wdata_mempool); } -static void nfs_writedata_free(struct nfs_write_data *wdata) +void nfs_writedata_release(void *data) { - call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free); -} + struct nfs_write_data *wdata = data; -void nfs_writedata_release(void *wdata) -{ + put_nfs_open_context(wdata->args.context); nfs_writedata_free(wdata); } @@ -291,8 +282,6 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, spin_unlock(&inode->i_lock); if (!nfs_pageio_add_request(pgio, req)) { nfs_redirty_request(req); - nfs_end_page_writeback(page); - nfs_clear_page_tag_locked(req); return pgio->pg_error; } return 0; @@ -366,15 +355,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) /* * Insert a write request into an inode */ -static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); int error; error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); - BUG_ON(error == -EEXIST); - if (error) - return error; + BUG_ON(error); if (!nfsi->npages) { igrab(inode); if (nfs_have_delegation(inode, FMODE_WRITE)) @@ -384,8 +371,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) set_page_private(req->wb_page, (unsigned long)req); nfsi->npages++; kref_get(&req->wb_kref); - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); - return 0; + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, + NFS_PAGE_TAG_LOCKED); } /* @@ -413,7 +400,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) } static void -nfs_redirty_request(struct nfs_page *req) +nfs_mark_request_dirty(struct nfs_page *req) { __set_page_dirty_nobuffers(req->wb_page); } @@ -467,7 +454,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req) return 1; } if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { - nfs_redirty_request(req); + nfs_mark_request_dirty(req); return 1; } return 0; @@ -597,6 +584,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, /* Loop over all inode entries and see if we find * A request for the page we wish to update */ + if (new) { + if (radix_tree_preload(GFP_NOFS)) { + nfs_release_request(new); + return ERR_PTR(-ENOMEM); + } + } + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); if (req) { @@ -607,28 +601,27 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, error = nfs_wait_on_request(req); nfs_release_request(req); if (error < 0) { - if (new) + if (new) { + radix_tree_preload_end(); nfs_release_request(new); + } return ERR_PTR(error); } continue; } spin_unlock(&inode->i_lock); - if (new) + if (new) { + radix_tree_preload_end(); nfs_release_request(new); + } break; } if (new) { - int error; nfs_lock_request_dontget(new); - error = nfs_inode_add_request(inode, new); - if (error) { - spin_unlock(&inode->i_lock); - nfs_unlock_request(new); - return ERR_PTR(error); - } + nfs_inode_add_request(inode, new); spin_unlock(&inode->i_lock); + radix_tree_preload_end(); req = new; goto zero_page; } @@ -806,6 +799,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, .rpc_message = &msg, .callback_ops = call_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = flags, .priority = priority, }; @@ -822,7 +816,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; - data->args.context = req->wb_context; + data->args.context = get_nfs_open_context(req->wb_context); data->args.stable = NFS_UNSTABLE; if (how & FLUSH_STABLE) { data->args.stable = NFS_DATA_SYNC; @@ -851,6 +845,17 @@ static void nfs_write_rpcsetup(struct nfs_page *req, rpc_put_task(task); } +/* If a nfs_flush_* function fails, it should remove reqs from @head and + * call this on each, which will prepare them to be retried on next + * writeback using standard nfs. + */ +static void nfs_redirty_request(struct nfs_page *req) +{ + nfs_mark_request_dirty(req); + nfs_end_page_writeback(req->wb_page); + nfs_clear_page_tag_locked(req); +} + /* * Generate multiple small requests to write out a single * contiguous dirty area on one page. @@ -906,8 +911,6 @@ out_bad: nfs_writedata_release(data); } nfs_redirty_request(req); - nfs_end_page_writeback(req->wb_page); - nfs_clear_page_tag_locked(req); return -ENOMEM; } @@ -948,8 +951,6 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_redirty_request(req); - nfs_end_page_writeback(req->wb_page); - nfs_clear_page_tag_locked(req); } return -ENOMEM; } @@ -1159,8 +1160,11 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -void nfs_commit_release(void *wdata) +void nfs_commit_release(void *data) { + struct nfs_write_data *wdata = data; + + put_nfs_open_context(wdata->args.context); nfs_commit_free(wdata); } @@ -1187,6 +1191,7 @@ static void nfs_commit_rpcsetup(struct list_head *head, .rpc_message = &msg, .callback_ops = &nfs_commit_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = flags, .priority = priority, }; @@ -1203,6 +1208,7 @@ static void nfs_commit_rpcsetup(struct list_head *head, /* Note: we always request a commit of the entire inode */ data->args.offset = 0; data->args.count = 0; + data->args.context = get_nfs_open_context(first->wb_context); data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -1297,7 +1303,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) } /* We have a mismatch. Write the page again */ dprintk(" mismatch\n"); - nfs_redirty_request(req); + nfs_mark_request_dirty(req); next: nfs_clear_page_tag_locked(req); } diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 4babb2a129ac..acf39e1e3a3d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -173,8 +173,10 @@ void nlmclnt_next_cookie(struct nlm_cookie *); /* * Host cache */ -struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *, int, int, - const char *, unsigned int); +struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin, + int proto, u32 version, + const char *hostname, + unsigned int hostname_len); struct nlm_host *nlmsvc_lookup_host(struct svc_rqst *, const char *, unsigned int); struct rpc_clnt * nlm_bind_host(struct nlm_host *); @@ -217,8 +219,7 @@ void nlmsvc_mark_resources(void); void nlmsvc_free_host_resources(struct nlm_host *); void nlmsvc_invalidate_all(void); -static __inline__ struct inode * -nlmsvc_file_inode(struct nlm_file *file) +static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) { return file->f_file->f_path.dentry->d_inode; } @@ -226,8 +227,8 @@ nlmsvc_file_inode(struct nlm_file *file) /* * Compare two host addresses (needs modifying for ipv6) */ -static __inline__ int -nlm_cmp_addr(const struct sockaddr_in *sin1, const struct sockaddr_in *sin2) +static inline int nlm_cmp_addr(const struct sockaddr_in *sin1, + const struct sockaddr_in *sin2) { return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; } @@ -236,8 +237,8 @@ nlm_cmp_addr(const struct sockaddr_in *sin1, const struct sockaddr_in *sin2) * Compare two NLM locks. * When the second lock is of type F_UNLCK, this acts like a wildcard. */ -static __inline__ int -nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) +static inline int nlm_compare_locks(const struct file_lock *fl1, + const struct file_lock *fl2) { return fl1->fl_pid == fl2->fl_pid && fl1->fl_owner == fl2->fl_owner diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 22a645828f26..5a5448bdb17d 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -19,6 +19,7 @@ #define SM_NOTIFY 6 #define SM_MAXSTRLEN 1024 +#define SM_PRIV_SIZE 16 /* * Arguments for all calls to statd diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 3423c6761bf7..ac7e4fb943ea 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -93,6 +93,7 @@ struct nfs_server { unsigned int wpages; /* write size (in pages) */ unsigned int wtmult; /* server disk block size */ unsigned int dtsize; /* readdir size */ + unsigned short port; /* "port=" setting */ unsigned int bsize; /* server block size */ unsigned int acregmin; /* attr cache timeouts */ unsigned int acregmax; @@ -117,6 +118,13 @@ struct nfs_server { atomic_t active; /* Keep trace of any activity to this server */ wait_queue_head_t active_wq; /* Wait for any activity to stop */ + + /* mountd-related mount options */ + struct sockaddr_storage mountd_address; + size_t mountd_addrlen; + u32 mountd_version; + unsigned short mountd_port; + unsigned short mountd_protocol; }; /* Server capabilities */ diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 7a69ca3bebaf..e93cd8aa3eb6 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -59,8 +59,8 @@ struct rpc_cred { /* * Client authentication handle */ -#define RPC_CREDCACHE_NR 8 -#define RPC_CREDCACHE_MASK (RPC_CREDCACHE_NR - 1) +#define RPC_CREDCACHE_HASHBITS 4 +#define RPC_CREDCACHE_NR (1 << RPC_CREDCACHE_HASHBITS) struct rpc_cred_cache { struct hlist_head hashtable[RPC_CREDCACHE_NR]; spinlock_t lock; @@ -89,7 +89,6 @@ struct rpc_auth { /* Flags for rpcauth_lookupcred() */ #define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */ -#define RPCAUTH_LOOKUP_ROOTCREDS 0x02 /* This really ought to go! */ /* * Client authentication ops @@ -113,6 +112,7 @@ struct rpc_credops { void (*crdestroy)(struct rpc_cred *); int (*crmatch)(struct auth_cred *, struct rpc_cred *, int); + void (*crbind)(struct rpc_task *, struct rpc_cred *); __be32 * (*crmarshal)(struct rpc_task *, __be32 *); int (*crrefresh)(struct rpc_task *); __be32 * (*crvalidate)(struct rpc_task *, __be32 *); @@ -126,9 +126,12 @@ extern const struct rpc_authops authunix_ops; extern const struct rpc_authops authnull_ops; void __init rpc_init_authunix(void); +void __init rpc_init_generic_auth(void); void __init rpcauth_init_module(void); void __exit rpcauth_remove_module(void); +void __exit rpc_destroy_generic_auth(void); +struct rpc_cred * rpc_lookup_cred(void); int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); @@ -136,8 +139,8 @@ void rpcauth_release(struct rpc_auth *); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); -struct rpc_cred * rpcauth_bindcred(struct rpc_task *); -void rpcauth_holdcred(struct rpc_task *); +void rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int); +void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *); void put_rpccred(struct rpc_cred *); void rpcauth_unbindcred(struct rpc_task *); __be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 129a86e25d29..6fff7f82ef12 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -127,11 +127,12 @@ int rpcb_getport_sync(struct sockaddr_in *, u32, u32, int); void rpcb_getport_async(struct rpc_task *); void rpc_call_start(struct rpc_task *); -int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, - int flags, const struct rpc_call_ops *tk_ops, +int rpc_call_async(struct rpc_clnt *clnt, + const struct rpc_message *msg, int flags, + const struct rpc_call_ops *tk_ops, void *calldata); -int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, - int flags); +int rpc_call_sync(struct rpc_clnt *clnt, + const struct rpc_message *msg, int flags); struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags); void rpc_restart_call(struct rpc_task *); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index f689f02e6793..d1a5c8c1a0f1 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -11,7 +11,6 @@ #include <linux/timer.h> #include <linux/sunrpc/types.h> -#include <linux/rcupdate.h> #include <linux/spinlock.h> #include <linux/wait.h> #include <linux/workqueue.h> @@ -33,7 +32,8 @@ struct rpc_wait_queue; struct rpc_wait { struct list_head list; /* wait queue links */ struct list_head links; /* Links to related tasks */ - struct rpc_wait_queue * rpc_waitq; /* RPC wait queue we're on */ + struct list_head timer_list; /* Timer list */ + unsigned long expires; }; /* @@ -57,33 +57,25 @@ struct rpc_task { __u8 tk_cred_retry; /* - * timeout_fn to be executed by timer bottom half * callback to be executed after waking up * action next procedure for async tasks * tk_ops caller callbacks */ - void (*tk_timeout_fn)(struct rpc_task *); void (*tk_callback)(struct rpc_task *); void (*tk_action)(struct rpc_task *); const struct rpc_call_ops *tk_ops; void * tk_calldata; - /* - * tk_timer is used for async processing by the RPC scheduling - * primitives. You should not access this directly unless - * you have a pathological interest in kernel oopses. - */ - struct timer_list tk_timer; /* kernel timer */ unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned short tk_flags; /* misc flags */ unsigned long tk_runstate; /* Task run status */ struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could * be any workqueue */ + struct rpc_wait_queue *tk_waitqueue; /* RPC wait queue we're on */ union { struct work_struct tk_work; /* Async task work queue */ struct rpc_wait tk_wait; /* RPC wait */ - struct rcu_head tk_rcu; /* for task deletion */ } u; unsigned short tk_timeouts; /* maj timeouts */ @@ -123,6 +115,7 @@ struct rpc_task_setup { const struct rpc_message *rpc_message; const struct rpc_call_ops *callback_ops; void *callback_data; + struct workqueue_struct *workqueue; unsigned short flags; signed char priority; }; @@ -147,9 +140,7 @@ struct rpc_task_setup { #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 -#define RPC_TASK_WAKEUP 2 -#define RPC_TASK_HAS_TIMER 3 -#define RPC_TASK_ACTIVE 4 +#define RPC_TASK_ACTIVE 2 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) @@ -171,15 +162,6 @@ struct rpc_task_setup { smp_mb__after_clear_bit(); \ } while (0) -#define rpc_start_wakeup(t) \ - (test_and_set_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate) == 0) -#define rpc_finish_wakeup(t) \ - do { \ - smp_mb__before_clear_bit(); \ - clear_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate); \ - smp_mb__after_clear_bit(); \ - } while (0) - #define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate) /* @@ -192,6 +174,12 @@ struct rpc_task_setup { #define RPC_PRIORITY_HIGH (1) #define RPC_NR_PRIORITY (1 + RPC_PRIORITY_HIGH - RPC_PRIORITY_LOW) +struct rpc_timer { + struct timer_list timer; + struct list_head list; + unsigned long expires; +}; + /* * RPC synchronization objects */ @@ -204,6 +192,7 @@ struct rpc_wait_queue { unsigned char count; /* # task groups remaining serviced so far */ unsigned char nr; /* # tasks remaining for cookie */ unsigned short qlen; /* total # tasks waiting in queue */ + struct rpc_timer timer_list; #ifdef RPC_DEBUG const char * name; #endif @@ -229,9 +218,11 @@ void rpc_killall_tasks(struct rpc_clnt *); void rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); +void rpc_destroy_wait_queue(struct rpc_wait_queue *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, - rpc_action action, rpc_action timer); -void rpc_wake_up_task(struct rpc_task *); + rpc_action action); +void rpc_wake_up_queued_task(struct rpc_wait_queue *, + struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); void rpc_wake_up_status(struct rpc_wait_queue *, int); diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 92e1dbe50947..5369aa369b35 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ - auth.o auth_null.o auth_unix.o \ + auth.o auth_null.o auth_unix.o auth_generic.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index eca941ce298b..0632cd0a1ad7 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/errno.h> +#include <linux/hash.h> #include <linux/sunrpc/clnt.h> #include <linux/spinlock.h> @@ -280,10 +281,9 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, struct hlist_node *pos; struct rpc_cred *cred = NULL, *entry, *new; - int nr = 0; + unsigned int nr; - if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) - nr = acred->uid & RPC_CREDCACHE_MASK; + nr = hash_long(acred->uid, RPC_CREDCACHE_HASHBITS); rcu_read_lock(); hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { @@ -356,7 +356,6 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) put_group_info(acred.group_info); return ret; } -EXPORT_SYMBOL_GPL(rpcauth_lookupcred); void rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, @@ -375,41 +374,58 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, } EXPORT_SYMBOL_GPL(rpcauth_init_cred); -struct rpc_cred * -rpcauth_bindcred(struct rpc_task *task) +void +rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) +{ + task->tk_msg.rpc_cred = get_rpccred(cred); + dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, + cred->cr_auth->au_ops->au_name, cred); +} +EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); + +static void +rpcauth_bind_root_cred(struct rpc_task *task) { struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred acred = { - .uid = current->fsuid, - .gid = current->fsgid, - .group_info = current->group_info, + .uid = 0, + .gid = 0, }; struct rpc_cred *ret; - int flags = 0; dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); - get_group_info(acred.group_info); - if (task->tk_flags & RPC_TASK_ROOTCREDS) - flags |= RPCAUTH_LOOKUP_ROOTCREDS; - ret = auth->au_ops->lookup_cred(auth, &acred, flags); + ret = auth->au_ops->lookup_cred(auth, &acred, 0); + if (!IS_ERR(ret)) + task->tk_msg.rpc_cred = ret; + else + task->tk_status = PTR_ERR(ret); +} + +static void +rpcauth_bind_new_cred(struct rpc_task *task) +{ + struct rpc_auth *auth = task->tk_client->cl_auth; + struct rpc_cred *ret; + + dprintk("RPC: %5u looking up %s cred\n", + task->tk_pid, auth->au_ops->au_name); + ret = rpcauth_lookupcred(auth, 0); if (!IS_ERR(ret)) task->tk_msg.rpc_cred = ret; else task->tk_status = PTR_ERR(ret); - put_group_info(acred.group_info); - return ret; } void -rpcauth_holdcred(struct rpc_task *task) +rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; - if (cred != NULL) { - get_rpccred(cred); - dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, - cred->cr_auth->au_ops->au_name, cred); - } + if (cred != NULL) + cred->cr_ops->crbind(task, cred); + else if (flags & RPC_TASK_ROOTCREDS) + rpcauth_bind_root_cred(task); + else + rpcauth_bind_new_cred(task); } void @@ -550,6 +566,7 @@ static struct shrinker rpc_cred_shrinker = { void __init rpcauth_init_module(void) { rpc_init_authunix(); + rpc_init_generic_auth(); register_shrinker(&rpc_cred_shrinker); } diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c new file mode 100644 index 000000000000..6a3f77c9e4d3 --- /dev/null +++ b/net/sunrpc/auth_generic.c @@ -0,0 +1,157 @@ +/* + * Generic RPC credential + * + * Copyright (C) 2008, Trond Myklebust <Trond.Myklebust@netapp.com> + */ + +#include <linux/err.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/sunrpc/auth.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/debug.h> +#include <linux/sunrpc/sched.h> + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +struct generic_cred { + struct rpc_cred gc_base; + struct auth_cred acred; +}; + +static struct rpc_auth generic_auth; +static struct rpc_cred_cache generic_cred_cache; +static const struct rpc_credops generic_credops; + +/* + * Public call interface + */ +struct rpc_cred *rpc_lookup_cred(void) +{ + return rpcauth_lookupcred(&generic_auth, 0); +} +EXPORT_SYMBOL_GPL(rpc_lookup_cred); + +static void +generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) +{ + struct rpc_auth *auth = task->tk_client->cl_auth; + struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; + struct rpc_cred *ret; + + ret = auth->au_ops->lookup_cred(auth, acred, 0); + if (!IS_ERR(ret)) + task->tk_msg.rpc_cred = ret; + else + task->tk_status = PTR_ERR(ret); +} + +/* + * Lookup generic creds for current process + */ +static struct rpc_cred * +generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +{ + return rpcauth_lookup_credcache(&generic_auth, acred, flags); +} + +static struct rpc_cred * +generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +{ + struct generic_cred *gcred; + + gcred = kmalloc(sizeof(*gcred), GFP_KERNEL); + if (gcred == NULL) + return ERR_PTR(-ENOMEM); + + rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops); + gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; + + gcred->acred.uid = acred->uid; + gcred->acred.gid = acred->gid; + gcred->acred.group_info = acred->group_info; + if (gcred->acred.group_info != NULL) + get_group_info(gcred->acred.group_info); + + dprintk("RPC: allocated generic cred %p for uid %d gid %d\n", + gcred, acred->uid, acred->gid); + return &gcred->gc_base; +} + +static void +generic_free_cred(struct rpc_cred *cred) +{ + struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); + + dprintk("RPC: generic_free_cred %p\n", gcred); + if (gcred->acred.group_info != NULL) + put_group_info(gcred->acred.group_info); + kfree(gcred); +} + +static void +generic_free_cred_callback(struct rcu_head *head) +{ + struct rpc_cred *cred = container_of(head, struct rpc_cred, cr_rcu); + generic_free_cred(cred); +} + +static void +generic_destroy_cred(struct rpc_cred *cred) +{ + call_rcu(&cred->cr_rcu, generic_free_cred_callback); +} + +/* + * Match credentials against current process creds. + */ +static int +generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) +{ + struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); + + if (gcred->acred.uid != acred->uid || + gcred->acred.gid != acred->gid || + gcred->acred.group_info != acred->group_info) + return 0; + return 1; +} + +void __init rpc_init_generic_auth(void) +{ + spin_lock_init(&generic_cred_cache.lock); +} + +void __exit rpc_destroy_generic_auth(void) +{ + rpcauth_clear_credcache(&generic_cred_cache); +} + +static struct rpc_cred_cache generic_cred_cache = { + {{ NULL, },}, +}; + +static const struct rpc_authops generic_auth_ops = { + .owner = THIS_MODULE, +#ifdef RPC_DEBUG + .au_name = "Generic", +#endif + .lookup_cred = generic_lookup_cred, + .crcreate = generic_create_cred, +}; + +static struct rpc_auth generic_auth = { + .au_ops = &generic_auth_ops, + .au_count = ATOMIC_INIT(0), + .au_credcache = &generic_cred_cache, +}; + +static const struct rpc_credops generic_credops = { + .cr_name = "Generic cred", + .crdestroy = generic_destroy_cred, + .crbind = generic_bind_cred, + .crmatch = generic_match, +}; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 6dac38792288..d34f6dfc7516 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -266,6 +266,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) BUG_ON(!list_empty(&gss_msg->list)); if (gss_msg->ctx != NULL) gss_put_ctx(gss_msg->ctx); + rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue); kfree(gss_msg); } @@ -408,13 +409,13 @@ gss_refresh_upcall(struct rpc_task *task) } spin_lock(&inode->i_lock); if (gss_cred->gc_upcall != NULL) - rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL); + rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { task->tk_timeout = 0; gss_cred->gc_upcall = gss_msg; /* gss_upcall_callback will release the reference to gss_upcall_msg */ atomic_inc(&gss_msg->count); - rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL); + rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback); } else err = gss_msg->msg.errno; spin_unlock(&inode->i_lock); @@ -1299,6 +1300,7 @@ static const struct rpc_credops gss_credops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, .cr_init = gss_cred_init, + .crbind = rpcauth_generic_bind_cred, .crmatch = gss_match, .crmarshal = gss_marshal, .crrefresh = gss_refresh, @@ -1310,6 +1312,7 @@ static const struct rpc_credops gss_credops = { static const struct rpc_credops gss_nullops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, + .crbind = rpcauth_generic_bind_cred, .crmatch = gss_match, .crmarshal = gss_marshal, .crrefresh = gss_refresh_null, diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 537d0e8589dd..3c26c18df0de 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -125,6 +125,7 @@ static const struct rpc_credops null_credops = { .cr_name = "AUTH_NULL", .crdestroy = nul_destroy_cred, + .crbind = rpcauth_generic_bind_cred, .crmatch = nul_match, .crmarshal = nul_marshal, .crrefresh = nul_refresh, diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 5ed91e5bcee4..04e936a56fb2 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -60,7 +60,8 @@ static struct rpc_cred * unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { struct unx_cred *cred; - int i; + unsigned int groups = 0; + unsigned int i; dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", acred->uid, acred->gid); @@ -70,21 +71,17 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; - if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { - cred->uc_uid = 0; - cred->uc_gid = 0; - cred->uc_gids[0] = NOGROUP; - } else { - int groups = acred->group_info->ngroups; - if (groups > NFS_NGROUPS) - groups = NFS_NGROUPS; - - cred->uc_gid = acred->gid; - for (i = 0; i < groups; i++) - cred->uc_gids[i] = GROUP_AT(acred->group_info, i); - if (i < NFS_NGROUPS) - cred->uc_gids[i] = NOGROUP; - } + + if (acred->group_info != NULL) + groups = acred->group_info->ngroups; + if (groups > NFS_NGROUPS) + groups = NFS_NGROUPS; + + cred->uc_gid = acred->gid; + for (i = 0; i < groups; i++) + cred->uc_gids[i] = GROUP_AT(acred->group_info, i); + if (i < NFS_NGROUPS) + cred->uc_gids[i] = NOGROUP; return &cred->uc_base; } @@ -118,26 +115,21 @@ static int unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) { struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); - int i; + unsigned int groups = 0; + unsigned int i; - if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) { - int groups; - if (cred->uc_uid != acred->uid - || cred->uc_gid != acred->gid) - return 0; + if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid) + return 0; + if (acred->group_info != NULL) groups = acred->group_info->ngroups; - if (groups > NFS_NGROUPS) - groups = NFS_NGROUPS; - for (i = 0; i < groups ; i++) - if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) - return 0; - return 1; - } - return (cred->uc_uid == 0 - && cred->uc_gid == 0 - && cred->uc_gids[0] == (gid_t) NOGROUP); + if (groups > NFS_NGROUPS) + groups = NFS_NGROUPS; + for (i = 0; i < groups ; i++) + if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) + return 0; + return 1; } /* @@ -245,6 +237,7 @@ static const struct rpc_credops unix_credops = { .cr_name = "AUTH_UNIX", .crdestroy = unx_destroy_cred, + .crbind = rpcauth_generic_bind_cred, .crmatch = unx_match, .crmarshal = unx_marshal, .crrefresh = unx_refresh, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8c6a7f1a25e9..ea14314331b0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -548,7 +548,7 @@ EXPORT_SYMBOL_GPL(rpc_run_task); * @msg: RPC call parameters * @flags: RPC call flags */ -int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) +int rpc_call_sync(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags) { struct rpc_task *task; struct rpc_task_setup task_setup_data = { @@ -579,7 +579,7 @@ EXPORT_SYMBOL_GPL(rpc_call_sync); * @data: user call data */ int -rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, +rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, const struct rpc_call_ops *tk_ops, void *data) { struct rpc_task *task; @@ -1066,7 +1066,7 @@ call_transmit(struct rpc_task *task) if (task->tk_msg.rpc_proc->p_decode != NULL) return; task->tk_action = rpc_exit_task; - rpc_wake_up_task(task); + rpc_wake_up_queued_task(&task->tk_xprt->pending, task); } /* @@ -1535,7 +1535,7 @@ void rpc_show_tasks(void) proc = -1; if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); + rpc_waitq = rpc_qname(t->tk_waitqueue); printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", t->tk_pid, proc, diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 3164a0871cf0..f480c718b400 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -298,7 +298,7 @@ void rpcb_getport_async(struct rpc_task *task) /* Put self on queue before sending rpcbind request, in case * rpcb_getport_done completes before we return from rpc_run_task */ - rpc_sleep_on(&xprt->binding, task, NULL, NULL); + rpc_sleep_on(&xprt->binding, task, NULL); /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4c669121e607..6eab9bf94baf 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -38,9 +38,9 @@ static struct kmem_cache *rpc_buffer_slabp __read_mostly; static mempool_t *rpc_task_mempool __read_mostly; static mempool_t *rpc_buffer_mempool __read_mostly; -static void __rpc_default_timer(struct rpc_task *task); static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); +static void __rpc_queue_timer_fn(unsigned long ptr); /* * RPC tasks sit here while waiting for conditions to improve. @@ -57,41 +57,30 @@ struct workqueue_struct *rpciod_workqueue; * queue->lock and bh_disabled in order to avoid races within * rpc_run_timer(). */ -static inline void -__rpc_disable_timer(struct rpc_task *task) +static void +__rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { + if (task->tk_timeout == 0) + return; dprintk("RPC: %5u disabling timer\n", task->tk_pid); - task->tk_timeout_fn = NULL; task->tk_timeout = 0; + list_del(&task->u.tk_wait.timer_list); + if (list_empty(&queue->timer_list.list)) + del_timer(&queue->timer_list.timer); } -/* - * Run a timeout function. - * We use the callback in order to allow __rpc_wake_up_task() - * and friends to disable the timer synchronously on SMP systems - * without calling del_timer_sync(). The latter could cause a - * deadlock if called while we're holding spinlocks... - */ -static void rpc_run_timer(struct rpc_task *task) +static void +rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires) { - void (*callback)(struct rpc_task *); - - callback = task->tk_timeout_fn; - task->tk_timeout_fn = NULL; - if (callback && RPC_IS_QUEUED(task)) { - dprintk("RPC: %5u running timer\n", task->tk_pid); - callback(task); - } - smp_mb__before_clear_bit(); - clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); - smp_mb__after_clear_bit(); + queue->timer_list.expires = expires; + mod_timer(&queue->timer_list.timer, expires); } /* * Set up a timer for the current task. */ -static inline void -__rpc_add_timer(struct rpc_task *task, rpc_action timer) +static void +__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { if (!task->tk_timeout) return; @@ -99,27 +88,10 @@ __rpc_add_timer(struct rpc_task *task, rpc_action timer) dprintk("RPC: %5u setting alarm for %lu ms\n", task->tk_pid, task->tk_timeout * 1000 / HZ); - if (timer) - task->tk_timeout_fn = timer; - else - task->tk_timeout_fn = __rpc_default_timer; - set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); - mod_timer(&task->tk_timer, jiffies + task->tk_timeout); -} - -/* - * Delete any timer for the current task. Because we use del_timer_sync(), - * this function should never be called while holding queue->lock. - */ -static void -rpc_delete_timer(struct rpc_task *task) -{ - if (RPC_IS_QUEUED(task)) - return; - if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) { - del_singleshot_timer_sync(&task->tk_timer); - dprintk("RPC: %5u deleting timer\n", task->tk_pid); - } + task->u.tk_wait.expires = jiffies + task->tk_timeout; + if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) + rpc_set_queue_timer(queue, task->u.tk_wait.expires); + list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } /* @@ -161,7 +133,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task * list_add(&task->u.tk_wait.list, &queue->tasks[0]); else list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); - task->u.tk_wait.rpc_waitq = queue; + task->tk_waitqueue = queue; queue->qlen++; rpc_set_queued(task); @@ -181,22 +153,18 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); } - list_del(&task->u.tk_wait.list); } /* * Remove request from queue. * Note: must be called with spin lock held. */ -static void __rpc_remove_wait_queue(struct rpc_task *task) +static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) { - struct rpc_wait_queue *queue; - queue = task->u.tk_wait.rpc_waitq; - + __rpc_disable_timer(queue, task); if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); - else - list_del(&task->u.tk_wait.list); + list_del(&task->u.tk_wait.list); queue->qlen--; dprintk("RPC: %5u removed from queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); @@ -229,6 +197,9 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c INIT_LIST_HEAD(&queue->tasks[i]); queue->maxpriority = nr_queues - 1; rpc_reset_waitqueue_priority(queue); + queue->qlen = 0; + setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); + INIT_LIST_HEAD(&queue->timer_list.list); #ifdef RPC_DEBUG queue->name = qname; #endif @@ -245,6 +216,12 @@ void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) } EXPORT_SYMBOL_GPL(rpc_init_wait_queue); +void rpc_destroy_wait_queue(struct rpc_wait_queue *queue) +{ + del_timer_sync(&queue->timer_list.timer); +} +EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); + static int rpc_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) @@ -313,7 +290,6 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); */ static void rpc_make_runnable(struct rpc_task *task) { - BUG_ON(task->tk_timeout_fn); rpc_clear_queued(task); if (rpc_test_and_set_running(task)) return; @@ -326,7 +302,7 @@ static void rpc_make_runnable(struct rpc_task *task) int status; INIT_WORK(&task->u.tk_work, rpc_async_schedule); - status = queue_work(task->tk_workqueue, &task->u.tk_work); + status = queue_work(rpciod_workqueue, &task->u.tk_work); if (status < 0) { printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); task->tk_status = status; @@ -343,7 +319,7 @@ static void rpc_make_runnable(struct rpc_task *task) * as it's on a wait queue. */ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, rpc_action timer) + rpc_action action) { dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); @@ -357,11 +333,11 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, BUG_ON(task->tk_callback != NULL); task->tk_callback = action; - __rpc_add_timer(task, timer); + __rpc_add_timer(q, task); } void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, rpc_action timer) + rpc_action action) { /* Mark the task as being activated if so needed */ rpc_set_active(task); @@ -370,18 +346,19 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, * Protect the queue operations. */ spin_lock_bh(&q->lock); - __rpc_sleep_on(q, task, action, timer); + __rpc_sleep_on(q, task, action); spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(rpc_sleep_on); /** * __rpc_do_wake_up_task - wake up a single rpc_task + * @queue: wait queue * @task: task to be woken up * * Caller must hold queue->lock, and have cleared the task queued flag. */ -static void __rpc_do_wake_up_task(struct rpc_task *task) +static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task *task) { dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n", task->tk_pid, jiffies); @@ -395,8 +372,7 @@ static void __rpc_do_wake_up_task(struct rpc_task *task) return; } - __rpc_disable_timer(task); - __rpc_remove_wait_queue(task); + __rpc_remove_wait_queue(queue, task); rpc_make_runnable(task); @@ -404,48 +380,32 @@ static void __rpc_do_wake_up_task(struct rpc_task *task) } /* - * Wake up the specified task + * Wake up a queued task while the queue lock is being held */ -static void __rpc_wake_up_task(struct rpc_task *task) +static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (rpc_start_wakeup(task)) { - if (RPC_IS_QUEUED(task)) - __rpc_do_wake_up_task(task); - rpc_finish_wakeup(task); - } + if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); } /* - * Default timeout handler if none specified by user + * Wake up a task on a specific queue */ -static void -__rpc_default_timer(struct rpc_task *task) +void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task) { - dprintk("RPC: %5u timeout (default timer)\n", task->tk_pid); - task->tk_status = -ETIMEDOUT; - rpc_wake_up_task(task); + spin_lock_bh(&queue->lock); + rpc_wake_up_task_queue_locked(queue, task); + spin_unlock_bh(&queue->lock); } +EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* * Wake up the specified task */ -void rpc_wake_up_task(struct rpc_task *task) +static void rpc_wake_up_task(struct rpc_task *task) { - rcu_read_lock_bh(); - if (rpc_start_wakeup(task)) { - if (RPC_IS_QUEUED(task)) { - struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; - - /* Note: we're already in a bh-safe context */ - spin_lock(&queue->lock); - __rpc_do_wake_up_task(task); - spin_unlock(&queue->lock); - } - rpc_finish_wakeup(task); - } - rcu_read_unlock_bh(); + rpc_wake_up_queued_task(task->tk_waitqueue, task); } -EXPORT_SYMBOL_GPL(rpc_wake_up_task); /* * Wake up the next task on a priority queue. @@ -495,7 +455,7 @@ new_queue: new_owner: rpc_set_waitqueue_owner(queue, task->tk_owner); out: - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); return task; } @@ -508,16 +468,14 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); if (RPC_IS_PRIORITY(queue)) task = __rpc_wake_up_next_priority(queue); else { task_for_first(task, &queue->tasks[0]) - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); return task; } @@ -534,18 +492,16 @@ void rpc_wake_up(struct rpc_wait_queue *queue) struct rpc_task *task, *next; struct list_head *head; - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); if (head == &queue->tasks[0]) break; head--; } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); } EXPORT_SYMBOL_GPL(rpc_wake_up); @@ -561,26 +517,48 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) struct rpc_task *task, *next; struct list_head *head; - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) { task->tk_status = status; - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); } if (head == &queue->tasks[0]) break; head--; } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); } EXPORT_SYMBOL_GPL(rpc_wake_up_status); +static void __rpc_queue_timer_fn(unsigned long ptr) +{ + struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr; + struct rpc_task *task, *n; + unsigned long expires, now, timeo; + + spin_lock(&queue->lock); + expires = now = jiffies; + list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) { + timeo = task->u.tk_wait.expires; + if (time_after_eq(now, timeo)) { + dprintk("RPC: %5u timeout\n", task->tk_pid); + task->tk_status = -ETIMEDOUT; + rpc_wake_up_task_queue_locked(queue, task); + continue; + } + if (expires == now || time_after(expires, timeo)) + expires = timeo; + } + if (!list_empty(&queue->timer_list.list)) + rpc_set_queue_timer(queue, expires); + spin_unlock(&queue->lock); +} + static void __rpc_atrun(struct rpc_task *task) { - rpc_wake_up_task(task); + task->tk_status = 0; } /* @@ -589,7 +567,7 @@ static void __rpc_atrun(struct rpc_task *task) void rpc_delay(struct rpc_task *task, unsigned long delay) { task->tk_timeout = delay; - rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); + rpc_sleep_on(&delay_queue, task, __rpc_atrun); } EXPORT_SYMBOL_GPL(rpc_delay); @@ -644,10 +622,6 @@ static void __rpc_execute(struct rpc_task *task) BUG_ON(RPC_IS_QUEUED(task)); for (;;) { - /* - * Garbage collection of pending timers... - */ - rpc_delete_timer(task); /* * Execute any pending callback. @@ -816,8 +790,6 @@ EXPORT_SYMBOL_GPL(rpc_free); static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data) { memset(task, 0, sizeof(*task)); - setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer, - (unsigned long)task); atomic_set(&task->tk_count, 1); task->tk_flags = task_setup_data->flags; task->tk_ops = task_setup_data->callback_ops; @@ -832,7 +804,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_owner = current->tgid; /* Initialize workqueue for async tasks */ - task->tk_workqueue = rpciod_workqueue; + task->tk_workqueue = task_setup_data->workqueue; task->tk_client = task_setup_data->rpc_client; if (task->tk_client != NULL) { @@ -845,12 +817,11 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_action = rpc_prepare_task; if (task_setup_data->rpc_message != NULL) { - memcpy(&task->tk_msg, task_setup_data->rpc_message, sizeof(task->tk_msg)); + task->tk_msg.rpc_proc = task_setup_data->rpc_message->rpc_proc; + task->tk_msg.rpc_argp = task_setup_data->rpc_message->rpc_argp; + task->tk_msg.rpc_resp = task_setup_data->rpc_message->rpc_resp; /* Bind the user cred */ - if (task->tk_msg.rpc_cred != NULL) - rpcauth_holdcred(task); - else - rpcauth_bindcred(task); + rpcauth_bindcred(task, task_setup_data->rpc_message->rpc_cred, task_setup_data->flags); if (task->tk_action == NULL) rpc_call_start(task); } @@ -868,13 +839,6 @@ rpc_alloc_task(void) return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); } -static void rpc_free_task(struct rcu_head *rcu) -{ - struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu); - dprintk("RPC: %5u freeing task\n", task->tk_pid); - mempool_free(task, rpc_task_mempool); -} - /* * Create a new task for the specified client. */ @@ -898,12 +862,25 @@ out: return task; } - -void rpc_put_task(struct rpc_task *task) +static void rpc_free_task(struct rpc_task *task) { const struct rpc_call_ops *tk_ops = task->tk_ops; void *calldata = task->tk_calldata; + if (task->tk_flags & RPC_TASK_DYNAMIC) { + dprintk("RPC: %5u freeing task\n", task->tk_pid); + mempool_free(task, rpc_task_mempool); + } + rpc_release_calldata(tk_ops, calldata); +} + +static void rpc_async_release(struct work_struct *work) +{ + rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); +} + +void rpc_put_task(struct rpc_task *task) +{ if (!atomic_dec_and_test(&task->tk_count)) return; /* Release resources */ @@ -915,9 +892,11 @@ void rpc_put_task(struct rpc_task *task) rpc_release_client(task->tk_client); task->tk_client = NULL; } - if (task->tk_flags & RPC_TASK_DYNAMIC) - call_rcu_bh(&task->u.tk_rcu, rpc_free_task); - rpc_release_calldata(tk_ops, calldata); + if (task->tk_workqueue != NULL) { + INIT_WORK(&task->u.tk_work, rpc_async_release); + queue_work(task->tk_workqueue, &task->u.tk_work); + } else + rpc_free_task(task); } EXPORT_SYMBOL_GPL(rpc_put_task); @@ -937,9 +916,6 @@ static void rpc_release_task(struct rpc_task *task) } BUG_ON (RPC_IS_QUEUED(task)); - /* Synchronously delete any running timer */ - rpc_delete_timer(task); - #ifdef RPC_DEBUG task->tk_magic = 0; #endif @@ -1029,11 +1005,20 @@ rpc_destroy_mempool(void) kmem_cache_destroy(rpc_task_slabp); if (rpc_buffer_slabp) kmem_cache_destroy(rpc_buffer_slabp); + rpc_destroy_wait_queue(&delay_queue); } int rpc_init_mempool(void) { + /* + * The following is not strictly a mempool initialisation, + * but there is no harm in doing it here + */ + rpc_init_wait_queue(&delay_queue, "delayq"); + if (!rpciod_start()) + goto err_nomem; + rpc_task_slabp = kmem_cache_create("rpc_tasks", sizeof(struct rpc_task), 0, SLAB_HWCACHE_ALIGN, @@ -1054,13 +1039,6 @@ rpc_init_mempool(void) rpc_buffer_slabp); if (!rpc_buffer_mempool) goto err_nomem; - if (!rpciod_start()) - goto err_nomem; - /* - * The following is not strictly a mempool initialisation, - * but there is no harm in doing it here - */ - rpc_init_wait_queue(&delay_queue, "delayq"); return 0; err_nomem: rpc_destroy_mempool(); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d5553b8179f9..85199c647022 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -188,9 +188,9 @@ out_sleep: task->tk_timeout = 0; task->tk_status = -EAGAIN; if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL, NULL); + rpc_sleep_on(&xprt->resend, task, NULL); else - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + rpc_sleep_on(&xprt->sending, task, NULL); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt); @@ -238,9 +238,9 @@ out_sleep: task->tk_timeout = 0; task->tk_status = -EAGAIN; if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL, NULL); + rpc_sleep_on(&xprt->resend, task, NULL); else - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + rpc_sleep_on(&xprt->sending, task, NULL); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); @@ -453,7 +453,7 @@ void xprt_wait_for_buffer_space(struct rpc_task *task) struct rpc_xprt *xprt = req->rq_xprt; task->tk_timeout = req->rq_timeout; - rpc_sleep_on(&xprt->pending, task, NULL, NULL); + rpc_sleep_on(&xprt->pending, task, NULL); } EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); @@ -472,7 +472,7 @@ void xprt_write_space(struct rpc_xprt *xprt) if (xprt->snd_task) { dprintk("RPC: write space: waking waiting task on " "xprt %p\n", xprt); - rpc_wake_up_task(xprt->snd_task); + rpc_wake_up_queued_task(&xprt->pending, xprt->snd_task); } spin_unlock_bh(&xprt->transport_lock); } @@ -602,8 +602,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) /* Try to schedule an autoclose RPC call */ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(rpciod_workqueue, &xprt->task_cleanup); - else if (xprt->snd_task != NULL) - rpc_wake_up_task(xprt->snd_task); + xprt_wake_pending_tasks(xprt, -ENOTCONN); spin_unlock_bh(&xprt->transport_lock); } EXPORT_SYMBOL_GPL(xprt_force_disconnect); @@ -653,7 +652,7 @@ void xprt_connect(struct rpc_task *task) task->tk_rqstp->rq_bytes_sent = 0; task->tk_timeout = xprt->connect_timeout; - rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); + rpc_sleep_on(&xprt->pending, task, xprt_connect_status); xprt->stat.connect_start = jiffies; xprt->ops->connect(task); } @@ -749,18 +748,19 @@ EXPORT_SYMBOL_GPL(xprt_update_rtt); void xprt_complete_rqst(struct rpc_task *task, int copied) { struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", task->tk_pid, ntohl(req->rq_xid), copied); - task->tk_xprt->stat.recvs++; + xprt->stat.recvs++; task->tk_rtt = (long)jiffies - req->rq_xtime; list_del_init(&req->rq_list); /* Ensure all writes are done before we update req->rq_received */ smp_wmb(); req->rq_received = req->rq_private_buf.len = copied; - rpc_wake_up_task(task); + rpc_wake_up_queued_task(&xprt->pending, task); } EXPORT_SYMBOL_GPL(xprt_complete_rqst); @@ -769,17 +769,17 @@ static void xprt_timer(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + if (task->tk_status != -ETIMEDOUT) + return; dprintk("RPC: %5u xprt_timer\n", task->tk_pid); - spin_lock(&xprt->transport_lock); + spin_lock_bh(&xprt->transport_lock); if (!req->rq_received) { if (xprt->ops->timer) xprt->ops->timer(task); - task->tk_status = -ETIMEDOUT; - } - task->tk_timeout = 0; - rpc_wake_up_task(task); - spin_unlock(&xprt->transport_lock); + } else + task->tk_status = 0; + spin_unlock_bh(&xprt->transport_lock); } /** @@ -864,7 +864,7 @@ void xprt_transmit(struct rpc_task *task) if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; else if (!req->rq_received) - rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); + rpc_sleep_on(&xprt->pending, task, xprt_timer); spin_unlock_bh(&xprt->transport_lock); return; } @@ -875,7 +875,7 @@ void xprt_transmit(struct rpc_task *task) */ task->tk_status = status; if (status == -ECONNREFUSED) - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + rpc_sleep_on(&xprt->sending, task, NULL); } static inline void do_xprt_reserve(struct rpc_task *task) @@ -895,7 +895,7 @@ static inline void do_xprt_reserve(struct rpc_task *task) dprintk("RPC: waiting for request slot\n"); task->tk_status = -EAGAIN; task->tk_timeout = 0; - rpc_sleep_on(&xprt->backlog, task, NULL, NULL); + rpc_sleep_on(&xprt->backlog, task, NULL); } /** @@ -1052,6 +1052,11 @@ static void xprt_destroy(struct kref *kref) xprt->shutdown = 1; del_timer_sync(&xprt->timer); + rpc_destroy_wait_queue(&xprt->binding); + rpc_destroy_wait_queue(&xprt->pending); + rpc_destroy_wait_queue(&xprt->sending); + rpc_destroy_wait_queue(&xprt->resend); + rpc_destroy_wait_queue(&xprt->backlog); /* * Tear down transport state and free the rpc_xprt */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 30e7ac243a90..8bd3b0f73ac0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1073,6 +1073,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) { struct rpc_xprt *xprt; read_descriptor_t rd_desc; + int read; dprintk("RPC: xs_tcp_data_ready...\n"); @@ -1084,8 +1085,10 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ rd_desc.arg.data = xprt; - rd_desc.count = 65536; - tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); + do { + rd_desc.count = 65536; + read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); + } while (read > 0); out: read_unlock(&sk->sk_callback_lock); } |