From ec482cc1c1180c65da582e17f4e79d204bb338e0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:23:44 -0500 Subject: xprtrdma: Fix sparse warnings linux/net/sunrpc/xprtrdma/rpc_rdma.c:375:63: warning: incorrect type in argument 5 (different base types) linux/net/sunrpc/xprtrdma/rpc_rdma.c:375:63: expected unsigned int [usertype] xid linux/net/sunrpc/xprtrdma/rpc_rdma.c:375:63: got restricted __be32 [usertype] rq_xid linux/net/sunrpc/xprtrdma/rpc_rdma.c:432:62: warning: incorrect type in argument 5 (different base types) linux/net/sunrpc/xprtrdma/rpc_rdma.c:432:62: expected unsigned int [usertype] xid linux/net/sunrpc/xprtrdma/rpc_rdma.c:432:62: got restricted __be32 [usertype] rq_xid linux/net/sunrpc/xprtrdma/rpc_rdma.c:489:62: warning: incorrect type in argument 5 (different base types) linux/net/sunrpc/xprtrdma/rpc_rdma.c:489:62: expected unsigned int [usertype] xid linux/net/sunrpc/xprtrdma/rpc_rdma.c:489:62: got restricted __be32 [usertype] rq_xid Fixes: 0a93fbcb16e6 ("xprtrdma: Plant XID in on-the-wire RDMA ... ") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 4 ++-- net/sunrpc/xprtrdma/xprt_rdma.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 6a561056b538..52cb6c1b0c2b 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -391,7 +391,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) */ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, u32 xid, + int nsegs, bool writing, __be32 xid, struct rpcrdma_mr **out) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; @@ -446,7 +446,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, goto out_mapmr_err; ibmr->iova &= 0x00000000ffffffff; - ibmr->iova |= ((u64)cpu_to_be32(xid)) << 32; + ibmr->iova |= ((u64)be32_to_cpu(xid)) << 32; key = (u8)(ibmr->rkey & 0x000000FF); ib_update_fast_reg_key(ibmr, ++key); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 5a18472f2c9c..33db208fa9a8 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -577,7 +577,7 @@ void frwr_release_mr(struct rpcrdma_mr *mr); size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt); struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, u32 xid, + int nsegs, bool writing, __be32 xid, struct rpcrdma_mr **mr); int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); -- cgit v1.2.3 From d4550bbee66f4ba5a5e9bbe8055006332ebfc58b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:23:49 -0500 Subject: xprtrdma: Check inline size before providing a Write chunk In very rare cases, an NFS READ operation might predict that the non-payload part of the RPC Call is large. For instance, an NFSv4 COMPOUND with a large GETATTR result, in combination with a large Kerberos credential, could push the non-payload part to be several kilobytes. If the non-payload part is larger than the connection's inline threshold, the client is required to provision a Reply chunk. The current Linux client does not check for this case. There are two obvious ways to handle it: a. Provision a Write chunk for the payload and a Reply chunk for the non-payload part b. Provision a Reply chunk for the whole RPC Reply Some testing at a recent NFS bake-a-thon showed that servers can mostly handle a. but there are some corner cases that do not work yet. b. already works (it has to, to handle krb5i/p), but could be somewhat less efficient. However, I expect this scenario to be very rare -- no-one has reported a problem yet. So I'm going to implement b. Sometime later I will provide some patches to help make b. a little more efficient by more carefully choosing the Reply chunk's segment sizes to ensure the payload is optimally aligned. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index d18614e02b4e..7774aee7c013 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -164,6 +164,21 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; } +/* The client is required to provide a Reply chunk if the maximum + * size of the non-payload part of the RPC Reply is larger than + * the inline threshold. + */ +static bool +rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt, + const struct rpc_rqst *rqst) +{ + const struct xdr_buf *buf = &rqst->rq_rcv_buf; + const struct rpcrdma_ia *ia = &r_xprt->rx_ia; + + return buf->head[0].iov_len + buf->tail[0].iov_len < + ia->ri_max_inline_read; +} + /* Split @vec on page boundaries into SGEs. FMR registers pages, not * a byte range. Other modes coalesce these SGEs into a single MR * when they can. @@ -762,7 +777,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) */ if (rpcrdma_results_inline(r_xprt, rqst)) wtype = rpcrdma_noch; - else if (ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) + else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) && + rpcrdma_nonpayload_inline(r_xprt, rqst)) wtype = rpcrdma_writech; else wtype = rpcrdma_replych; -- cgit v1.2.3 From e340c2d6ef2a8cdcc11672f8cc839c30ad360e52 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:23:54 -0500 Subject: xprtrdma: Reduce the doorbell rate (Receive) Post RECV WRs in batches to reduce the hardware doorbell rate per transport. This helps the RPC-over-RDMA client scale better in number of transports. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 2 ++ net/sunrpc/xprtrdma/xprt_rdma.h | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4994e75945b8..b4e997d53ec7 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1480,6 +1480,8 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) if (ep->rep_receive_count > needed) goto out; needed -= ep->rep_receive_count; + if (!temp) + needed += RPCRDMA_MAX_RECV_BATCH; count = 0; wr = NULL; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 33db208fa9a8..10f6593e1a6a 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -205,6 +205,16 @@ struct rpcrdma_rep { struct ib_recv_wr rr_recv_wr; }; +/* To reduce the rate at which a transport invokes ib_post_recv + * (and thus the hardware doorbell rate), xprtrdma posts Receive + * WRs in batches. + * + * Setting this to zero disables Receive post batching. + */ +enum { + RPCRDMA_MAX_RECV_BATCH = 7, +}; + /* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes */ struct rpcrdma_req; -- cgit v1.2.3 From 0ccc61b1c76e5163c6fea6cf83bd18e7ea244c5b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:24:05 -0500 Subject: SUNRPC: Add xdr_stream::rqst field Having access to the controlling rpc_rqst means a trace point in the XDR code can report: - the XID - the task ID and client ID - the p_name of RPC being processed Subsequent patches will introduce such trace points. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/callback_xdr.c | 5 +++-- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- include/linux/sunrpc/xdr.h | 8 ++++++-- net/sunrpc/auth.c | 4 ++-- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- net/sunrpc/xdr.c | 12 +++++++++--- net/sunrpc/xprtrdma/backchannel.c | 2 +- net/sunrpc/xprtrdma/rpc_rdma.c | 4 ++-- 8 files changed, 26 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index a87a56273407..bc7c1766a2be 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -943,10 +943,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) }; unsigned int nops = 0; - xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); + xdr_init_decode(&xdr_in, &rqstp->rq_arg, + rqstp->rq_arg.head[0].iov_base, NULL); p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); - xdr_init_encode(&xdr_out, &rqstp->rq_res, p); + xdr_init_encode(&xdr_out, &rqstp->rq_res, p, NULL); status = decode_compound_hdr_arg(&xdr_in, &hdr_arg); if (status == htonl(NFS4ERR_RESOURCE)) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 63abe705f4ca..32701b6a9566 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2036,7 +2036,7 @@ ff_layout_encode_layoutreturn(struct xdr_stream *xdr, dprintk("%s: Begin\n", __func__); - xdr_init_encode(&tmp_xdr, &tmp_buf, NULL); + xdr_init_encode(&tmp_xdr, &tmp_buf, NULL, NULL); ff_layout_encode_ioerr(&tmp_xdr, args, ff_args); ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args); diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2ec128060239..787939d13643 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -217,6 +217,8 @@ struct xdr_stream { struct kvec scratch; /* Scratch buffer */ struct page **page_ptr; /* pointer to the current page */ unsigned int nwords; /* Remaining decode buffer length */ + + struct rpc_rqst *rqst; /* For debugging */ }; /* @@ -227,7 +229,8 @@ typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, void *obj); -extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, + __be32 *p, struct rpc_rqst *rqst); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern void xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); @@ -235,7 +238,8 @@ extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); -extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, + __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, unsigned int len); extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index f3023bbc0b7f..8dfab6119e6a 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -798,7 +798,7 @@ static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp, { struct xdr_stream xdr; - xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data); + xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data, rqstp); encode(rqstp, &xdr, obj); } @@ -823,7 +823,7 @@ rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp, { struct xdr_stream xdr; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data); + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data, rqstp); return decode(rqstp, &xdr, obj); } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 1531b0219344..a42672e81792 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1722,7 +1722,7 @@ static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp, { struct xdr_stream xdr; - xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p); + xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p, rqstp); encode(rqstp, &xdr, obj); } @@ -1998,7 +1998,7 @@ gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp, { struct xdr_stream xdr; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p, rqstp); return decode(rqstp, &xdr, obj); } diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index f302c6eb8779..345f08b634ee 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -483,6 +483,7 @@ EXPORT_SYMBOL_GPL(xdr_stream_pos); * @xdr: pointer to xdr_stream struct * @buf: pointer to XDR buffer in which to encode data * @p: current pointer inside XDR buffer + * @rqst: pointer to controlling rpc_rqst, for debugging * * Note: at the moment the RPC client only passes the length of our * scratch buffer in the xdr_buf's header kvec. Previously this @@ -491,7 +492,8 @@ EXPORT_SYMBOL_GPL(xdr_stream_pos); * of the buffer length, and takes care of adjusting the kvec * length for us. */ -void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) +void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, + struct rpc_rqst *rqst) { struct kvec *iov = buf->head; int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; @@ -513,6 +515,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) buf->len += len; iov->iov_len += len; } + xdr->rqst = rqst; } EXPORT_SYMBOL_GPL(xdr_init_encode); @@ -819,8 +822,10 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr) * @xdr: pointer to xdr_stream struct * @buf: pointer to XDR buffer from which to decode data * @p: current pointer inside XDR buffer + * @rqst: pointer to controlling rpc_rqst, for debugging */ -void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) +void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, + struct rpc_rqst *rqst) { xdr->buf = buf; xdr->scratch.iov_base = NULL; @@ -836,6 +841,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) xdr->nwords -= p - xdr->p; xdr->p = p; } + xdr->rqst = rqst; } EXPORT_SYMBOL_GPL(xdr_init_decode); @@ -854,7 +860,7 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, buf->page_len = len; buf->buflen = len; buf->len = len; - xdr_init_decode(xdr, buf, NULL); + xdr_init_decode(xdr, buf, NULL, NULL); } EXPORT_SYMBOL_GPL(xdr_init_decode_pages); diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 0de9b3e63770..98c1e43eb7b1 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -123,7 +123,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf, - req->rl_rdmabuf->rg_base); + req->rl_rdmabuf->rg_base, rqst); p = xdr_reserve_space(&req->rl_stream, 28); if (unlikely(!p)) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 7774aee7c013..6c1fb270f127 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -748,7 +748,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); xdr_init_encode(xdr, &req->rl_hdrbuf, - req->rl_rdmabuf->rg_base); + req->rl_rdmabuf->rg_base, rqst); /* Fixed header fields */ ret = -EMSGSIZE; @@ -1329,7 +1329,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) /* Fixed transport header fields */ xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, - rep->rr_hdrbuf.head[0].iov_base); + rep->rr_hdrbuf.head[0].iov_base, NULL); p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p)); if (unlikely(!p)) goto out_shortreply; -- cgit v1.2.3 From 5582863f450ca44c472dbf4812a62f4e0e68b0c8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:24:10 -0500 Subject: SUNRPC: Add XDR overflow trace event This can help field troubleshooting without needing the overhead of a full network capture (ie, tcpdump). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 67 +++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xdr.c | 24 +++++++++++----- 2 files changed, 84 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index f88b0f52aa7e..fbc41b8142d3 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -254,6 +254,73 @@ TRACE_EVENT(rpc_stats_latency, __entry->backlog, __entry->rtt, __entry->execute) ); +TRACE_EVENT(rpc_xdr_overflow, + TP_PROTO( + const struct xdr_stream *xdr, + size_t requested + ), + + TP_ARGS(xdr, requested), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(int, version) + __field(size_t, requested) + __field(const void *, end) + __field(const void *, p) + __field(const void *, head_base) + __field(size_t, head_len) + __field(const void *, tail_base) + __field(size_t, tail_len) + __field(unsigned int, page_len) + __field(unsigned int, len) + __string(progname, + xdr->rqst->rq_task->tk_client->cl_program->name) + __string(procedure, + xdr->rqst->rq_task->tk_msg.rpc_proc->p_name) + ), + + TP_fast_assign( + if (xdr->rqst) { + const struct rpc_task *task = xdr->rqst->rq_task; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __assign_str(progname, + task->tk_client->cl_program->name) + __entry->version = task->tk_client->cl_vers; + __assign_str(procedure, task->tk_msg.rpc_proc->p_name) + } else { + __entry->task_id = 0; + __entry->client_id = 0; + __assign_str(progname, "unknown") + __entry->version = 0; + __assign_str(procedure, "unknown") + } + __entry->requested = requested; + __entry->end = xdr->end; + __entry->p = xdr->p; + __entry->head_base = xdr->buf->head[0].iov_base, + __entry->head_len = xdr->buf->head[0].iov_len, + __entry->page_len = xdr->buf->page_len, + __entry->tail_base = xdr->buf->tail[0].iov_base, + __entry->tail_len = xdr->buf->tail[0].iov_len, + __entry->len = xdr->buf->len; + ), + + TP_printk( + "task:%u@%u %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", + __entry->task_id, __entry->client_id, + __get_str(progname), __entry->version, __get_str(procedure), + __entry->requested, __entry->p, __entry->end, + __entry->head_base, __entry->head_len, + __entry->page_len, + __entry->tail_base, __entry->tail_len, + __entry->len + ) +); + /* * First define the enums in the below macros to be exported to userspace * via TRACE_DEFINE_ENUM(). diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 345f08b634ee..6d0b615a02ae 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * XDR functions for basic NFS types @@ -554,9 +555,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, int frag1bytes, frag2bytes; if (nbytes > PAGE_SIZE) - return NULL; /* Bigger buffers require special handling */ + goto out_overflow; /* Bigger buffers require special handling */ if (xdr->buf->len + nbytes > xdr->buf->buflen) - return NULL; /* Sorry, we're totally out of space */ + goto out_overflow; /* Sorry, we're totally out of space */ frag1bytes = (xdr->end - xdr->p) << 2; frag2bytes = nbytes - frag1bytes; if (xdr->iov) @@ -585,6 +586,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, xdr->buf->page_len += frag2bytes; xdr->buf->len += nbytes; return p; +out_overflow: + trace_rpc_xdr_overflow(xdr, nbytes); + return NULL; } /** @@ -902,20 +906,23 @@ static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes) size_t cplen = (char *)xdr->end - (char *)xdr->p; if (nbytes > xdr->scratch.iov_len) - return NULL; + goto out_overflow; p = __xdr_inline_decode(xdr, cplen); if (p == NULL) return NULL; memcpy(cpdest, p, cplen); + if (!xdr_set_next_buffer(xdr)) + goto out_overflow; cpdest += cplen; nbytes -= cplen; - if (!xdr_set_next_buffer(xdr)) - return NULL; p = __xdr_inline_decode(xdr, nbytes); if (p == NULL) return NULL; memcpy(cpdest, p, nbytes); return xdr->scratch.iov_base; +out_overflow: + trace_rpc_xdr_overflow(xdr, nbytes); + return NULL; } /** @@ -932,14 +939,17 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { __be32 *p; - if (nbytes == 0) + if (unlikely(nbytes == 0)) return xdr->p; if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr)) - return NULL; + goto out_overflow; p = __xdr_inline_decode(xdr, nbytes); if (p != NULL) return p; return xdr_copy_to_scratch(xdr, nbytes); +out_overflow: + trace_rpc_xdr_overflow(xdr, nbytes); + return NULL; } EXPORT_SYMBOL_GPL(xdr_inline_decode); -- cgit v1.2.3 From 7be9cea3600ba24f45733f823cb4ecb5a40f5db7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:24:16 -0500 Subject: SUNRPC: Add trace event that reports reply page vector alignment We don't want READ payloads that are partially in the head iovec and in the page buffer because this requires pull-up, which can be expensive. The NFS/RPC client tries hard to predict the size of the head iovec so that the incoming READ data payload lands only in the page vector, but it doesn't always get it right. To help diagnose such problems, add a trace point in the logic that decodes READ-like operations that reports whether pull-up is being done. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 59 +++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xdr.c | 33 +++++++++++++++++++----- 2 files changed, 86 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index fbc41b8142d3..627650800676 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -321,6 +321,65 @@ TRACE_EVENT(rpc_xdr_overflow, ) ); +TRACE_EVENT(rpc_xdr_alignment, + TP_PROTO( + const struct xdr_stream *xdr, + size_t offset, + unsigned int copied + ), + + TP_ARGS(xdr, offset, copied), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(int, version) + __field(size_t, offset) + __field(unsigned int, copied) + __field(const void *, head_base) + __field(size_t, head_len) + __field(const void *, tail_base) + __field(size_t, tail_len) + __field(unsigned int, page_len) + __field(unsigned int, len) + __string(progname, + xdr->rqst->rq_task->tk_client->cl_program->name) + __string(procedure, + xdr->rqst->rq_task->tk_msg.rpc_proc->p_name) + ), + + TP_fast_assign( + const struct rpc_task *task = xdr->rqst->rq_task; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __assign_str(progname, + task->tk_client->cl_program->name) + __entry->version = task->tk_client->cl_vers; + __assign_str(procedure, task->tk_msg.rpc_proc->p_name) + + __entry->offset = offset; + __entry->copied = copied; + __entry->head_base = xdr->buf->head[0].iov_base, + __entry->head_len = xdr->buf->head[0].iov_len, + __entry->page_len = xdr->buf->page_len, + __entry->tail_base = xdr->buf->tail[0].iov_base, + __entry->tail_len = xdr->buf->tail[0].iov_len, + __entry->len = xdr->buf->len; + ), + + TP_printk( + "task:%u@%u %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", + __entry->task_id, __entry->client_id, + __get_str(progname), __entry->version, __get_str(procedure), + __entry->offset, __entry->copied, + __entry->head_base, __entry->head_len, + __entry->page_len, + __entry->tail_base, __entry->tail_len, + __entry->len + ) +); + /* * First define the enums in the below macros to be exported to userspace * via TRACE_DEFINE_ENUM(). diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 6d0b615a02ae..5f0aa53fa4ae 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -347,13 +347,15 @@ EXPORT_SYMBOL_GPL(_copy_from_pages); * 'len' bytes. The extra data is not lost, but is instead * moved into the inlined pages and/or the tail. */ -static void +static unsigned int xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) { struct kvec *head, *tail; size_t copy, offs; unsigned int pglen = buf->page_len; + unsigned int result; + result = 0; tail = buf->tail; head = buf->head; @@ -367,6 +369,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) copy = tail->iov_len - len; memmove((char *)tail->iov_base + len, tail->iov_base, copy); + result += copy; } /* Copy from the inlined pages into the tail */ copy = len; @@ -377,11 +380,13 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) copy = 0; else if (copy > tail->iov_len - offs) copy = tail->iov_len - offs; - if (copy != 0) + if (copy != 0) { _copy_from_pages((char *)tail->iov_base + offs, buf->pages, buf->page_base + pglen + offs - len, copy); + result += copy; + } /* Do we also need to copy data from the head into the tail ? */ if (len > pglen) { offs = copy = len - pglen; @@ -391,6 +396,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) (char *)head->iov_base + head->iov_len - offs, copy); + result += copy; } } /* Now handle pages */ @@ -406,12 +412,15 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) _copy_to_pages(buf->pages, buf->page_base, (char *)head->iov_base + head->iov_len - len, copy); + result += copy; } head->iov_len -= len; buf->buflen -= len; /* Have we truncated the message? */ if (buf->len > buf->buflen) buf->len = buf->buflen; + + return result; } /** @@ -423,14 +432,16 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) * 'len' bytes. The extra data is not lost, but is instead * moved into the tail. */ -static void +static unsigned int xdr_shrink_pagelen(struct xdr_buf *buf, size_t len) { struct kvec *tail; size_t copy; unsigned int pglen = buf->page_len; unsigned int tailbuf_len; + unsigned int result; + result = 0; tail = buf->tail; BUG_ON (len > pglen); @@ -448,18 +459,22 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len) if (tail->iov_len > len) { char *p = (char *)tail->iov_base + len; memmove(p, tail->iov_base, tail->iov_len - len); + result += tail->iov_len - len; } else copy = tail->iov_len; /* Copy from the inlined pages into the tail */ _copy_from_pages((char *)tail->iov_base, buf->pages, buf->page_base + pglen - len, copy); + result += copy; } buf->page_len -= len; buf->buflen -= len; /* Have we truncated the message? */ if (buf->len > buf->buflen) buf->len = buf->buflen; + + return result; } void @@ -959,13 +974,17 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) struct kvec *iov; unsigned int nwords = XDR_QUADLEN(len); unsigned int cur = xdr_stream_pos(xdr); + unsigned int copied, offset; if (xdr->nwords == 0) return 0; + /* Realign pages to current pointer position */ - iov = buf->head; + iov = buf->head; if (iov->iov_len > cur) { - xdr_shrink_bufhead(buf, iov->iov_len - cur); + offset = iov->iov_len - cur; + copied = xdr_shrink_bufhead(buf, offset); + trace_rpc_xdr_alignment(xdr, offset, copied); xdr->nwords = XDR_QUADLEN(buf->len - cur); } @@ -977,7 +996,9 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) len = buf->page_len; else if (nwords < xdr->nwords) { /* Truncate page data and move it into the tail */ - xdr_shrink_pagelen(buf, buf->page_len - len); + offset = buf->page_len - len; + copied = xdr_shrink_pagelen(buf, offset); + trace_rpc_xdr_alignment(xdr, offset, copied); xdr->nwords = XDR_QUADLEN(buf->len - cur); } return len; -- cgit v1.2.3 From 80125d4ae70aff049b170dd21ea5d007694d629a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:24:32 -0500 Subject: SUNRPC: Remove some dprintk() call sites from auth functions Clean up: Reduce dprintk noise by removing dprintk() call sites from hot path that do not report exceptions. These are usually replaceable with function graph tracing. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 29 ----------------------------- net/sunrpc/auth_unix.c | 9 +-------- 2 files changed, 1 insertion(+), 37 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 8dfab6119e6a..275e84e817b7 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -17,10 +17,6 @@ #include #include -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - #define RPC_CREDCACHE_DEFAULT_HASHBITS (4) struct rpc_cred_cache { struct hlist_head *hashtable; @@ -267,8 +263,6 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size) } } rcu_read_unlock(); - - dprintk("RPC: %s returns %d\n", __func__, result); return result; } EXPORT_SYMBOL_GPL(rpcauth_list_flavors); @@ -636,9 +630,6 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) struct rpc_cred *ret; const struct cred *cred = current_cred(); - dprintk("RPC: looking up %s cred\n", - auth->au_ops->au_name); - memset(&acred, 0, sizeof(acred)); acred.cred = cred; ret = auth->au_ops->lookup_cred(auth, &acred, flags); @@ -670,8 +661,6 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) }; struct rpc_cred *ret; - dprintk("RPC: %5u looking up %s cred\n", - task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); put_cred(acred.cred); return ret; @@ -688,8 +677,6 @@ rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags) if (!acred.principal) return NULL; - dprintk("RPC: %5u looking up %s machine cred\n", - task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); return auth->au_ops->lookup_cred(auth, &acred, lookupflags); } @@ -698,8 +685,6 @@ rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; - dprintk("RPC: %5u looking up %s cred\n", - task->tk_pid, auth->au_ops->au_name); return rpcauth_lookupcred(auth, lookupflags); } @@ -776,9 +761,6 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; - dprintk("RPC: %5u marshaling %s cred %p\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); - return cred->cr_ops->crmarshal(task, p); } @@ -787,9 +769,6 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; - dprintk("RPC: %5u validating %s cred %p\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); - return cred->cr_ops->crvalidate(task, p); } @@ -808,8 +787,6 @@ rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, { struct rpc_cred *cred = task->tk_rqstp->rq_cred; - dprintk("RPC: %5u using %s cred %p to wrap rpc data\n", - task->tk_pid, cred->cr_ops->cr_name, cred); if (cred->cr_ops->crwrap_req) return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj); /* By default, we encode the arguments normally. */ @@ -833,8 +810,6 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, { struct rpc_cred *cred = task->tk_rqstp->rq_cred; - dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n", - task->tk_pid, cred->cr_ops->cr_name, cred); if (cred->cr_ops->crunwrap_resp) return cred->cr_ops->crunwrap_resp(task, decode, rqstp, data, obj); @@ -865,8 +840,6 @@ rpcauth_refreshcred(struct rpc_task *task) goto out; cred = task->tk_rqstp->rq_cred; } - dprintk("RPC: %5u refreshing %s cred %p\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); err = cred->cr_ops->crrefresh(task); out: @@ -880,8 +853,6 @@ rpcauth_invalcred(struct rpc_task *task) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; - dprintk("RPC: %5u invalidating %s cred %p\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); if (cred) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); } diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 387f6b3ffbea..fc8a59134640 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -28,8 +28,6 @@ static mempool_t *unix_pool; static struct rpc_auth * unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { - dprintk("RPC: creating UNIX authenticator for client %p\n", - clnt); refcount_inc(&unix_auth.au_count); return &unix_auth; } @@ -37,7 +35,6 @@ unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) static void unx_destroy(struct rpc_auth *auth) { - dprintk("RPC: destroying UNIX authenticator %p\n", auth); } /* @@ -48,10 +45,6 @@ unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS); - dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", - from_kuid(&init_user_ns, acred->cred->fsuid), - from_kgid(&init_user_ns, acred->cred->fsgid)); - rpcauth_init_cred(ret, acred, auth, &unix_credops); ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; return ret; @@ -61,7 +54,7 @@ static void unx_free_cred_callback(struct rcu_head *head) { struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu); - dprintk("RPC: unx_free_cred %p\n", rpc_cred); + put_cred(rpc_cred->cr_cred); mempool_free(rpc_cred, unix_pool); } -- cgit v1.2.3 From 067fb11b12af1448f7bbcacca41e470cb775e9fa Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:24:37 -0500 Subject: SUNRPC: Remove rpc_xprt::tsh_size tsh_size was added to accommodate transports that send a pre-amble before each RPC message. However, this assumes the pre-amble is fixed in size, which isn't true for some transports. That makes tsh_size not very generic. Also I'd like to make the estimation of RPC send and receive buffer sizes more precise. tsh_size doesn't currently appear to be accounted for at all by call_allocate. Therefore let's just remove the tsh_size concept, and make the only transports that have a non-zero tsh_size employ a direct approach. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 7 --- net/sunrpc/auth_gss/auth_gss.c | 3 +- net/sunrpc/clnt.c | 1 - net/sunrpc/svc.c | 19 ++----- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 1 - net/sunrpc/xprtrdma/transport.c | 1 - net/sunrpc/xprtsock.c | 91 ++++++++++++++++++++---------- 7 files changed, 65 insertions(+), 58 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ad7e910b119d..3a391544299e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -196,8 +196,6 @@ struct rpc_xprt { size_t max_payload; /* largest RPC payload size, in bytes */ - unsigned int tsh_size; /* size of transport specific - header */ struct rpc_wait_queue binding; /* requests waiting on rpcbind */ struct rpc_wait_queue sending; /* requests waiting to send */ @@ -362,11 +360,6 @@ struct rpc_xprt * xprt_alloc(struct net *net, size_t size, unsigned int max_req); void xprt_free(struct rpc_xprt *); -static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) -{ - return p + xprt->tsh_size; -} - static inline int xprt_enable_swap(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index a42672e81792..4b52e2b11c58 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1563,8 +1563,7 @@ gss_marshal(struct rpc_task *task, __be32 *p) /* We compute the checksum for the verifier over the xdr-encoded bytes * starting with the xid and ending at the end of the credential: */ - iov.iov_base = xprt_skip_transport_header(req->rq_xprt, - req->rq_snd_buf.head[0].iov_base); + iov.iov_base = req->rq_snd_buf.head[0].iov_base; iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; xdr_buf_from_iov(&iov, &verf_buf); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d7ec6132c046..c4203f6138ef 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2331,7 +2331,6 @@ rpc_encode_header(struct rpc_task *task) /* FIXME: check buffer size? */ - p = xprt_skip_transport_header(req->rq_xprt, p); *p++ = req->rq_xid; /* XID */ *p++ = htonl(RPC_CALL); /* CALL */ *p++ = htonl(RPC_VERSION); /* RPC version */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e87ddb9f7feb..dbd19697ee38 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1144,17 +1144,6 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} #endif -/* - * Setup response header for TCP, it has a 4B record length field. - */ -static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) -{ - struct kvec *resv = &rqstp->rq_res.head[0]; - - /* tcp needs a space for the record length... */ - svc_putnl(resv, 0); -} - /* * Common routine for processing the RPC request. */ @@ -1182,10 +1171,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); clear_bit(RQ_DROPME, &rqstp->rq_flags); - /* Setup reply header */ - if (rqstp->rq_prot == IPPROTO_TCP) - svc_tcp_prep_reply_hdr(rqstp); - svc_putu32(resv, rqstp->rq_xid); vers = svc_getnl(argv); @@ -1443,6 +1428,10 @@ svc_process(struct svc_rqst *rqstp) goto out_drop; } + /* Reserve space for the record marker */ + if (rqstp->rq_prot == IPPROTO_TCP) + svc_putnl(resv, 0); + /* Returns 1 for send, 0 for drop */ if (likely(svc_process_common(rqstp, argv, resv))) return svc_send(rqstp); diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index b908f2ca08fd..907464c2a9f0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -304,7 +304,6 @@ xprt_setup_rdma_bc(struct xprt_create *args) xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; xprt->prot = XPRT_TRANSPORT_BC_RDMA; - xprt->tsh_size = 0; xprt->ops = &xprt_rdma_bc_procs; memcpy(&xprt->addr, args->dstaddr, args->addrlen); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index fbc171ebfe91..e7274dc10120 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -332,7 +332,6 @@ xprt_setup_rdma(struct xprt_create *args) xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; xprt->resvport = 0; /* privileged port not needed */ - xprt->tsh_size = 0; /* RPC-RDMA handles framing */ xprt->ops = &xprt_rdma_procs; /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7754aa3e434f..ae09d850cd11 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -696,6 +696,40 @@ xs_stream_reset_connect(struct sock_xprt *transport) #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) +/* Common case: + * - stream transport + * - sending from byte 0 of the message + * - the message is wholly contained in @xdr's head iovec + */ +static int xs_send_rm_and_kvec(struct socket *sock, struct xdr_buf *xdr, + unsigned int remainder) +{ + struct msghdr msg = { + .msg_flags = XS_SENDMSG_FLAGS | (remainder ? MSG_MORE : 0) + }; + rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | + (u32)xdr->len); + struct kvec iov[2] = { + { + .iov_base = &marker, + .iov_len = sizeof(marker) + }, + { + .iov_base = xdr->head[0].iov_base, + .iov_len = xdr->head[0].iov_len + }, + }; + int ret; + + ret = kernel_sendmsg(sock, &msg, iov, 2, + iov[0].iov_len + iov[1].iov_len); + if (ret < 0) + return ret; + if (ret < iov[0].iov_len) + return -EPIPE; + return ret - iov[0].iov_len; +} + static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more) { struct msghdr msg = { @@ -779,7 +813,11 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, if (base < xdr->head[0].iov_len || addr != NULL) { unsigned int len = xdr->head[0].iov_len - base; remainder -= len; - err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0); + if (!base && !addr) + err = xs_send_rm_and_kvec(sock, xdr, remainder); + else + err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], + base, remainder != 0); if (remainder == 0 || err != len) goto out; *sent_p += err; @@ -869,16 +907,6 @@ xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req) return transport->xmit.offset != 0 && req->rq_bytes_sent == 0; } -/* - * Construct a stream transport record marker in @buf. - */ -static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) -{ - u32 reclen = buf->len - sizeof(rpc_fraghdr); - rpc_fraghdr *base = buf->head[0].iov_base; - *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen); -} - /** * xs_local_send_request - write an RPC request to an AF_LOCAL socket * @req: pointer to RPC request @@ -905,8 +933,6 @@ static int xs_local_send_request(struct rpc_rqst *req) return -ENOTCONN; } - xs_encode_stream_record_marker(&req->rq_snd_buf); - xs_pktdump("packet data:", req->rq_svec->iov_base, req->rq_svec->iov_len); @@ -1057,8 +1083,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req) return -ENOTCONN; } - xs_encode_stream_record_marker(&req->rq_snd_buf); - xs_pktdump("packet data:", req->rq_svec->iov_base, req->rq_svec->iov_len); @@ -2534,26 +2558,35 @@ static int bc_sendto(struct rpc_rqst *req) { int len; struct xdr_buf *xbufp = &req->rq_snd_buf; - struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = - container_of(xprt, struct sock_xprt, xprt); - struct socket *sock = transport->sock; + container_of(req->rq_xprt, struct sock_xprt, xprt); unsigned long headoff; unsigned long tailoff; + struct page *tailpage; + struct msghdr msg = { + .msg_flags = MSG_MORE + }; + rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | + (u32)xbufp->len); + struct kvec iov = { + .iov_base = &marker, + .iov_len = sizeof(marker), + }; - xs_encode_stream_record_marker(xbufp); + len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len); + if (len != iov.iov_len) + return -EAGAIN; + tailpage = NULL; + if (xbufp->tail[0].iov_len) + tailpage = virt_to_page(xbufp->tail[0].iov_base); tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; - len = svc_send_common(sock, xbufp, + len = svc_send_common(transport->sock, xbufp, virt_to_page(xbufp->head[0].iov_base), headoff, - xbufp->tail[0].iov_base, tailoff); - - if (len != xbufp->len) { - printk(KERN_NOTICE "Error sending entire callback!\n"); - len = -EAGAIN; - } - + tailpage, tailoff); + if (len != xbufp->len) + return -EAGAIN; return len; } @@ -2793,7 +2826,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) transport = container_of(xprt, struct sock_xprt, xprt); xprt->prot = 0; - xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; xprt->bind_timeout = XS_BIND_TO; @@ -2862,7 +2894,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) transport = container_of(xprt, struct sock_xprt, xprt); xprt->prot = IPPROTO_UDP; - xprt->tsh_size = 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); @@ -2942,7 +2973,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) transport = container_of(xprt, struct sock_xprt, xprt); xprt->prot = IPPROTO_TCP; - xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; xprt->bind_timeout = XS_BIND_TO; @@ -3015,7 +3045,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) transport = container_of(xprt, struct sock_xprt, xprt); xprt->prot = IPPROTO_TCP; - xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; xprt->timeout = &xs_tcp_default_timeout; -- cgit v1.2.3 From fe9a270519c72bccb3af524db7ea6c7b67700d50 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:24:43 -0500 Subject: SUNRPC: Add build option to disable support for insecure enctypes Enable distributions to enforce the rejection of ancient and insecure Kerberos enctypes in the kernel's RPCSEC_GSS implementation. These are the single-DES encryption types that were deprecated in 2012 by RFC 6649. Enctypes that were deprecated more recently (by RFC 8429) remain fully supported for now because they are still likely to be widely used. Signed-off-by: Chuck Lever Acked-by: Simo Sorce Signed-off-by: Anna Schumaker --- include/linux/sunrpc/gss_krb5_enctypes.h | 42 +++++++++++++++++++++++++++++++- net/sunrpc/Kconfig | 16 ++++++++++++ net/sunrpc/auth_gss/gss_krb5_mech.c | 2 ++ 3 files changed, 59 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h index ec6234eee89c..981c89cef19d 100644 --- a/include/linux/sunrpc/gss_krb5_enctypes.h +++ b/include/linux/sunrpc/gss_krb5_enctypes.h @@ -1,4 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* - * Dumb way to share this static piece of information with nfsd + * Define the string that exports the set of kernel-supported + * Kerberos enctypes. This list is sent via upcall to gssd, and + * is also exposed via the nfsd /proc API. The consumers generally + * treat this as an ordered list, where the first item in the list + * is the most preferred. + */ + +#ifndef _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H +#define _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H + +#ifdef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES + +/* + * NB: This list includes encryption types that were deprecated + * by RFC 8429 (DES3_CBC_SHA1 and ARCFOUR_HMAC). + * + * ENCTYPE_AES256_CTS_HMAC_SHA1_96 + * ENCTYPE_AES128_CTS_HMAC_SHA1_96 + * ENCTYPE_DES3_CBC_SHA1 + * ENCTYPE_ARCFOUR_HMAC + */ +#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23" + +#else /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ + +/* + * NB: This list includes encryption types that were deprecated + * by RFC 8429 and RFC 6649. + * + * ENCTYPE_AES256_CTS_HMAC_SHA1_96 + * ENCTYPE_AES128_CTS_HMAC_SHA1_96 + * ENCTYPE_DES3_CBC_SHA1 + * ENCTYPE_ARCFOUR_HMAC + * ENCTYPE_DES_CBC_MD5 + * ENCTYPE_DES_CBC_CRC + * ENCTYPE_DES_CBC_MD4 */ #define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2" + +#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ + +#endif /* _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H */ diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index ac09ca803296..83f5617bae07 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -34,6 +34,22 @@ config RPCSEC_GSS_KRB5 If unsure, say Y. +config CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES + bool "Secure RPC: Disable insecure Kerberos encryption types" + depends on RPCSEC_GSS_KRB5 + default n + help + Choose Y here to disable the use of deprecated encryption types + with the Kerberos version 5 GSS-API mechanism (RFC 1964). The + deprecated encryption types include DES-CBC-MD5, DES-CBC-CRC, + and DES-CBC-MD4. These types were deprecated by RFC 6649 because + they were found to be insecure. + + N is the default because many sites have deployed KDCs and + keytabs that contain only these deprecated encryption types. + Choosing Y prevents the use of known-insecure encryption types + but might result in compatibility problems. + config SUNRPC_DEBUG bool "RPC: Enable dprintk debugging" depends on SUNRPC && SYSCTL diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index eab71fc7af3e..be31a58d54e0 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -53,6 +53,7 @@ static struct gss_api_mech gss_kerberos_mech; /* forward declaration */ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = { +#ifndef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES /* * DES (All DES enctypes are mapped to the same gss functionality) */ @@ -74,6 +75,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = { .cksumlength = 8, .keyed_cksum = 0, }, +#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ /* * RC4-HMAC */ -- cgit v1.2.3 From e8680a24a269bd6dcb533f4e4a5faba9ae58925c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:24:48 -0500 Subject: SUNRPC: Use struct xdr_stream when constructing RPC Call header Modernize and harden the code path that constructs each RPC Call message. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 15 ++-- include/linux/sunrpc/xdr.h | 6 ++ include/trace/events/sunrpc.h | 29 +++++++ net/sunrpc/auth.c | 56 ++++++++---- net/sunrpc/auth_gss/auth_gss.c | 191 ++++++++++++++++++++--------------------- net/sunrpc/auth_null.c | 23 +++-- net/sunrpc/auth_unix.c | 61 ++++++++----- net/sunrpc/clnt.c | 66 +++++++------- 8 files changed, 266 insertions(+), 181 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index eed3cb16ccf1..96e237f8e60b 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -131,11 +131,12 @@ struct rpc_credops { void (*crdestroy)(struct rpc_cred *); int (*crmatch)(struct auth_cred *, struct rpc_cred *, int); - __be32 * (*crmarshal)(struct rpc_task *, __be32 *); + int (*crmarshal)(struct rpc_task *task, + struct xdr_stream *xdr); int (*crrefresh)(struct rpc_task *); __be32 * (*crvalidate)(struct rpc_task *, __be32 *); - int (*crwrap_req)(struct rpc_task *, kxdreproc_t, - void *, __be32 *, void *); + int (*crwrap_req)(struct rpc_task *task, + struct xdr_stream *xdr); int (*crunwrap_resp)(struct rpc_task *, kxdrdproc_t, void *, __be32 *, void *); int (*crkey_timeout)(struct rpc_cred *); @@ -165,9 +166,13 @@ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred * void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); void put_rpccred(struct rpc_cred *); -__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); +int rpcauth_marshcred(struct rpc_task *task, + struct xdr_stream *xdr); __be32 * rpcauth_checkverf(struct rpc_task *, __be32 *); -int rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj); +int rpcauth_wrap_req_encode(struct rpc_task *task, + struct xdr_stream *xdr); +int rpcauth_wrap_req(struct rpc_task *task, + struct xdr_stream *xdr); int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj); bool rpcauth_xmit_need_reencode(struct rpc_task *task); int rpcauth_refreshcred(struct rpc_task *); diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 787939d13643..6df9ac1ca471 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -87,6 +87,12 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) #define xdr_one cpu_to_be32(1) #define xdr_two cpu_to_be32(2) +#define rpc_auth_null cpu_to_be32(RPC_AUTH_NULL) +#define rpc_auth_unix cpu_to_be32(RPC_AUTH_UNIX) +#define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS) + +#define rpc_call cpu_to_be32(RPC_CALL) + #define rpc_success cpu_to_be32(RPC_SUCCESS) #define rpc_prog_unavail cpu_to_be32(RPC_PROG_UNAVAIL) #define rpc_prog_mismatch cpu_to_be32(RPC_PROG_MISMATCH) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 627650800676..2b3f9d139e75 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -213,6 +213,35 @@ DECLARE_EVENT_CLASS(rpc_task_queued, DEFINE_RPC_QUEUED_EVENT(sleep); DEFINE_RPC_QUEUED_EVENT(wakeup); +DECLARE_EVENT_CLASS(rpc_failure, + + TP_PROTO(const struct rpc_task *task), + + TP_ARGS(task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + ), + + TP_printk("task:%u@%u", + __entry->task_id, __entry->client_id) +); + +#define DEFINE_RPC_FAILURE(name) \ + DEFINE_EVENT(rpc_failure, rpc_bad_##name, \ + TP_PROTO( \ + const struct rpc_task *task \ + ), \ + TP_ARGS(task)) + +DEFINE_RPC_FAILURE(callhdr); + TRACE_EVENT(rpc_stats_latency, TP_PROTO( diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 275e84e817b7..add2135d9b01 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -756,12 +756,21 @@ destroy: } EXPORT_SYMBOL_GPL(put_rpccred); -__be32 * -rpcauth_marshcred(struct rpc_task *task, __be32 *p) +/** + * rpcauth_marshcred - Append RPC credential to end of @xdr + * @task: controlling RPC task + * @xdr: xdr_stream containing initial portion of RPC Call header + * + * On success, an appropriate verifier is added to @xdr, @xdr is + * updated to point past the verifier, and zero is returned. + * Otherwise, @xdr is in an undefined state and a negative errno + * is returned. + */ +int rpcauth_marshcred(struct rpc_task *task, struct xdr_stream *xdr) { - struct rpc_cred *cred = task->tk_rqstp->rq_cred; + const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops; - return cred->cr_ops->crmarshal(task, p); + return ops->crmarshal(task, xdr); } __be32 * @@ -772,26 +781,37 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p) return cred->cr_ops->crvalidate(task, p); } -static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp, - __be32 *data, void *obj) +/** + * rpcauth_wrap_req_encode - XDR encode the RPC procedure + * @task: controlling RPC task + * @xdr: stream where on-the-wire bytes are to be marshalled + * + * On success, @xdr contains the encoded and wrapped message. + * Otherwise, @xdr is in an undefined state. + */ +int rpcauth_wrap_req_encode(struct rpc_task *task, struct xdr_stream *xdr) { - struct xdr_stream xdr; + kxdreproc_t encode = task->tk_msg.rpc_proc->p_encode; - xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data, rqstp); - encode(rqstp, &xdr, obj); + encode(task->tk_rqstp, xdr, task->tk_msg.rpc_argp); + return 0; } +EXPORT_SYMBOL_GPL(rpcauth_wrap_req_encode); -int -rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, - __be32 *data, void *obj) +/** + * rpcauth_wrap_req - XDR encode and wrap the RPC procedure + * @task: controlling RPC task + * @xdr: stream where on-the-wire bytes are to be marshalled + * + * On success, @xdr contains the encoded and wrapped message, + * and zero is returned. Otherwise, @xdr is in an undefined + * state and a negative errno is returned. + */ +int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr) { - struct rpc_cred *cred = task->tk_rqstp->rq_cred; + const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops; - if (cred->cr_ops->crwrap_req) - return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj); - /* By default, we encode the arguments normally. */ - rpcauth_wrap_req_encode(encode, rqstp, data, obj); - return 0; + return ops->crwrap_req(task, xdr); } static int diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 4b52e2b11c58..b333b1bdad45 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1526,18 +1526,20 @@ out: } /* -* Marshal credentials. -* Maybe we should keep a cached credential for performance reasons. -*/ -static __be32 * -gss_marshal(struct rpc_task *task, __be32 *p) + * Marshal credentials. + * + * The expensive part is computing the verifier. We can't cache a + * pre-computed version of the verifier because the seqno, which + * is different every time, is included in the MIC. + */ +static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_cred *cred = req->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - __be32 *cred_len; + __be32 *p, *cred_len; u32 maj_stat = 0; struct xdr_netobj mic; struct kvec iov; @@ -1545,7 +1547,13 @@ gss_marshal(struct rpc_task *task, __be32 *p) dprintk("RPC: %5u %s\n", task->tk_pid, __func__); - *p++ = htonl(RPC_AUTH_GSS); + /* Credential */ + + p = xdr_reserve_space(xdr, 7 * sizeof(*p) + + ctx->gc_wire_ctx.len); + if (!p) + goto out_put_ctx; + *p++ = rpc_auth_gss; cred_len = p++; spin_lock(&ctx->gc_seq_lock); @@ -1554,12 +1562,14 @@ gss_marshal(struct rpc_task *task, __be32 *p) if (req->rq_seqno == MAXSEQ) goto out_expired; - *p++ = htonl((u32) RPC_GSS_VERSION); - *p++ = htonl((u32) ctx->gc_proc); - *p++ = htonl((u32) req->rq_seqno); - *p++ = htonl((u32) gss_cred->gc_service); + *p++ = cpu_to_be32(RPC_GSS_VERSION); + *p++ = cpu_to_be32(ctx->gc_proc); + *p++ = cpu_to_be32(req->rq_seqno); + *p++ = cpu_to_be32(gss_cred->gc_service); p = xdr_encode_netobj(p, &ctx->gc_wire_ctx); - *cred_len = htonl((p - (cred_len + 1)) << 2); + *cred_len = cpu_to_be32((p - (cred_len + 1)) << 2); + + /* Verifier */ /* We compute the checksum for the verifier over the xdr-encoded bytes * starting with the xid and ending at the end of the credential: */ @@ -1567,27 +1577,27 @@ gss_marshal(struct rpc_task *task, __be32 *p) iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; xdr_buf_from_iov(&iov, &verf_buf); - /* set verifier flavor*/ - *p++ = htonl(RPC_AUTH_GSS); - + p = xdr_reserve_space(xdr, sizeof(*p)); + if (!p) + goto out_put_ctx; + *p++ = rpc_auth_gss; mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); - if (maj_stat == GSS_S_CONTEXT_EXPIRED) { + if (maj_stat == GSS_S_CONTEXT_EXPIRED) goto out_expired; - } else if (maj_stat != 0) { - pr_warn("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); - task->tk_status = -EIO; + else if (maj_stat != 0) + goto out_put_ctx; + if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0) goto out_put_ctx; - } - p = xdr_encode_opaque(p, NULL, mic.len); gss_put_ctx(ctx); - return p; + return 0; out_expired: + gss_put_ctx(ctx); clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); - task->tk_status = -EKEYEXPIRED; + return -EKEYEXPIRED; out_put_ctx: gss_put_ctx(ctx); - return NULL; + return -EMSGSIZE; } static int gss_renew_cred(struct rpc_task *task) @@ -1716,61 +1726,45 @@ out_bad: return ret; } -static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp, - __be32 *p, void *obj) -{ - struct xdr_stream xdr; - - xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p, rqstp); - encode(rqstp, &xdr, obj); -} - -static inline int -gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - kxdreproc_t encode, struct rpc_rqst *rqstp, - __be32 *p, void *obj) +static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, + struct rpc_task *task, struct xdr_stream *xdr) { - struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; - struct xdr_buf integ_buf; - __be32 *integ_len = NULL; + struct rpc_rqst *rqstp = task->tk_rqstp; + struct xdr_buf integ_buf, *snd_buf = &rqstp->rq_snd_buf; struct xdr_netobj mic; - u32 offset; - __be32 *q; - struct kvec *iov; - u32 maj_stat = 0; - int status = -EIO; + __be32 *p, *integ_len; + u32 offset, maj_stat; + p = xdr_reserve_space(xdr, 2 * sizeof(*p)); + if (!p) + goto wrap_failed; integ_len = p++; - offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; - *p++ = htonl(rqstp->rq_seqno); + *p = cpu_to_be32(rqstp->rq_seqno); - gss_wrap_req_encode(encode, rqstp, p, obj); + if (rpcauth_wrap_req_encode(task, xdr)) + goto wrap_failed; + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; if (xdr_buf_subsegment(snd_buf, &integ_buf, offset, snd_buf->len - offset)) - return status; - *integ_len = htonl(integ_buf.len); + goto wrap_failed; + *integ_len = cpu_to_be32(integ_buf.len); - /* guess whether we're in the head or the tail: */ - if (snd_buf->page_len || snd_buf->tail[0].iov_len) - iov = snd_buf->tail; - else - iov = snd_buf->head; - p = iov->iov_base + iov->iov_len; + p = xdr_reserve_space(xdr, 0); + if (!p) + goto wrap_failed; mic.data = (u8 *)(p + 1); - maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); - status = -EIO; /* XXX? */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) - return status; - q = xdr_encode_opaque(p, NULL, mic.len); - - offset = (u8 *)q - (u8 *)p; - iov->iov_len += offset; - snd_buf->len += offset; + goto wrap_failed; + /* Check that the trailing MIC fit in the buffer, after the fact */ + if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0) + goto wrap_failed; return 0; +wrap_failed: + return -EMSGSIZE; } static void @@ -1821,61 +1815,63 @@ out: return -EAGAIN; } -static inline int -gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - kxdreproc_t encode, struct rpc_rqst *rqstp, - __be32 *p, void *obj) +static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, + struct rpc_task *task, struct xdr_stream *xdr) { + struct rpc_rqst *rqstp = task->tk_rqstp; struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; - u32 offset; - u32 maj_stat; + u32 pad, offset, maj_stat; int status; - __be32 *opaque_len; + __be32 *p, *opaque_len; struct page **inpages; int first; - int pad; struct kvec *iov; - char *tmp; + status = -EIO; + p = xdr_reserve_space(xdr, 2 * sizeof(*p)); + if (!p) + goto wrap_failed; opaque_len = p++; - offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; - *p++ = htonl(rqstp->rq_seqno); + *p = cpu_to_be32(rqstp->rq_seqno); - gss_wrap_req_encode(encode, rqstp, p, obj); + if (rpcauth_wrap_req_encode(task, xdr)) + goto wrap_failed; status = alloc_enc_pages(rqstp); - if (status) - return status; + if (unlikely(status)) + goto wrap_failed; first = snd_buf->page_base >> PAGE_SHIFT; inpages = snd_buf->pages + first; snd_buf->pages = rqstp->rq_enc_pages; snd_buf->page_base -= first << PAGE_SHIFT; /* - * Give the tail its own page, in case we need extra space in the - * head when wrapping: + * Move the tail into its own page, in case gss_wrap needs + * more space in the head when wrapping. * - * call_allocate() allocates twice the slack space required - * by the authentication flavor to rq_callsize. - * For GSS, slack is GSS_CRED_SLACK. + * Still... Why can't gss_wrap just slide the tail down? */ if (snd_buf->page_len || snd_buf->tail[0].iov_len) { + char *tmp; + tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]); memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); snd_buf->tail[0].iov_base = tmp; } + status = -EIO; + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); /* slack space should prevent this ever happening: */ - BUG_ON(snd_buf->len > snd_buf->buflen); - status = -EIO; + if (unlikely(snd_buf->len > snd_buf->buflen)) + goto wrap_failed; /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was * done anyway, so it's safe to put the request on the wire: */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) - return status; + goto wrap_failed; - *opaque_len = htonl(snd_buf->len - offset); - /* guess whether we're in the head or the tail: */ + *opaque_len = cpu_to_be32(snd_buf->len - offset); + /* guess whether the pad goes into the head or the tail: */ if (snd_buf->page_len || snd_buf->tail[0].iov_len) iov = snd_buf->tail; else @@ -1887,37 +1883,36 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, snd_buf->len += pad; return 0; +wrap_failed: + return status; } -static int -gss_wrap_req(struct rpc_task *task, - kxdreproc_t encode, void *rqstp, __be32 *p, void *obj) +static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - int status = -EIO; + int status; dprintk("RPC: %5u %s\n", task->tk_pid, __func__); + status = -EIO; if (ctx->gc_proc != RPC_GSS_PROC_DATA) { /* The spec seems a little ambiguous here, but I think that not * wrapping context destruction requests makes the most sense. */ - gss_wrap_req_encode(encode, rqstp, p, obj); - status = 0; + status = rpcauth_wrap_req_encode(task, xdr); goto out; } switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: - gss_wrap_req_encode(encode, rqstp, p, obj); - status = 0; + status = rpcauth_wrap_req_encode(task, xdr); break; case RPC_GSS_SVC_INTEGRITY: - status = gss_wrap_req_integ(cred, ctx, encode, rqstp, p, obj); + status = gss_wrap_req_integ(cred, ctx, task, xdr); break; case RPC_GSS_SVC_PRIVACY: - status = gss_wrap_req_priv(cred, ctx, encode, rqstp, p, obj); + status = gss_wrap_req_priv(cred, ctx, task, xdr); break; } out: diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index d0ceac57c06e..797f8472c21b 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -59,15 +59,21 @@ nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags) /* * Marshal credential. */ -static __be32 * -nul_marshal(struct rpc_task *task, __be32 *p) +static int +nul_marshal(struct rpc_task *task, struct xdr_stream *xdr) { - *p++ = htonl(RPC_AUTH_NULL); - *p++ = 0; - *p++ = htonl(RPC_AUTH_NULL); - *p++ = 0; - - return p; + __be32 *p; + + p = xdr_reserve_space(xdr, 4 * sizeof(*p)); + if (!p) + return -EMSGSIZE; + /* Credential */ + *p++ = rpc_auth_null; + *p++ = xdr_zero; + /* Verifier */ + *p++ = rpc_auth_null; + *p = xdr_zero; + return 0; } /* @@ -125,6 +131,7 @@ const struct rpc_credops null_credops = { .crdestroy = nul_destroy_cred, .crmatch = nul_match, .crmarshal = nul_marshal, + .crwrap_req = rpcauth_wrap_req_encode, .crrefresh = nul_refresh, .crvalidate = nul_validate, }; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index fc8a59134640..1d5b7ed9c6f7 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -99,37 +99,55 @@ unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) * Marshal credentials. * Maybe we should keep a cached credential for performance reasons. */ -static __be32 * -unx_marshal(struct rpc_task *task, __be32 *p) +static int +unx_marshal(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_clnt *clnt = task->tk_client; struct rpc_cred *cred = task->tk_rqstp->rq_cred; - __be32 *base, *hold; + __be32 *p, *cred_len, *gidarr_len; int i; struct group_info *gi = cred->cr_cred->group_info; - *p++ = htonl(RPC_AUTH_UNIX); - base = p++; - *p++ = htonl(jiffies/HZ); - - /* - * Copy the UTS nodename captured when the client was created. - */ - p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); - - *p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid)); - *p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid)); - hold = p++; + /* Credential */ + + p = xdr_reserve_space(xdr, 3 * sizeof(*p)); + if (!p) + goto marshal_failed; + *p++ = rpc_auth_unix; + cred_len = p++; + *p++ = xdr_zero; /* stamp */ + if (xdr_stream_encode_opaque(xdr, clnt->cl_nodename, + clnt->cl_nodelen) < 0) + goto marshal_failed; + p = xdr_reserve_space(xdr, 3 * sizeof(*p)); + if (!p) + goto marshal_failed; + *p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid)); + *p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid)); + + gidarr_len = p++; if (gi) for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++) - *p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i])); - *hold = htonl(p - hold - 1); /* gid array length */ - *base = htonl((p - base - 1) << 2); /* cred length */ + *p++ = cpu_to_be32(from_kgid(&init_user_ns, + gi->gid[i])); + *gidarr_len = cpu_to_be32(p - gidarr_len - 1); + *cred_len = cpu_to_be32((p - cred_len - 1) << 2); + p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2); + if (!p) + goto marshal_failed; + + /* Verifier */ + + p = xdr_reserve_space(xdr, 2 * sizeof(*p)); + if (!p) + goto marshal_failed; + *p++ = rpc_auth_null; + *p = xdr_zero; - *p++ = htonl(RPC_AUTH_NULL); - *p++ = htonl(0); + return 0; - return p; +marshal_failed: + return -EMSGSIZE; } /* @@ -202,6 +220,7 @@ const struct rpc_credops unix_credops = { .crdestroy = unx_destroy_cred, .crmatch = unx_match, .crmarshal = unx_marshal, + .crwrap_req = rpcauth_wrap_req_encode, .crrefresh = unx_refresh, .crvalidate = unx_validate, }; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c4203f6138ef..d6750b7f169a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -77,7 +77,8 @@ static void call_timeout(struct rpc_task *task); static void call_connect(struct rpc_task *task); static void call_connect_status(struct rpc_task *task); -static __be32 *rpc_encode_header(struct rpc_task *task); +static int rpc_encode_header(struct rpc_task *task, + struct xdr_stream *xdr); static __be32 *rpc_verify_header(struct rpc_task *task); static int rpc_ping(struct rpc_clnt *clnt); @@ -1728,10 +1729,7 @@ static void rpc_xdr_encode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - kxdreproc_t encode; - __be32 *p; - - dprint_status(task); + struct xdr_stream xdr; xdr_buf_init(&req->rq_snd_buf, req->rq_buffer, @@ -1740,18 +1738,13 @@ rpc_xdr_encode(struct rpc_task *task) req->rq_rbuffer, req->rq_rcvsize); - p = rpc_encode_header(task); - if (p == NULL) + req->rq_snd_buf.head[0].iov_len = 0; + xdr_init_encode(&xdr, &req->rq_snd_buf, + req->rq_snd_buf.head[0].iov_base, req); + if (rpc_encode_header(task, &xdr)) return; - encode = task->tk_msg.rpc_proc->p_encode; - if (encode == NULL) - return; - - task->tk_status = rpcauth_wrap_req(task, encode, req, p, - task->tk_msg.rpc_argp); - if (task->tk_status == 0) - xprt_request_prepare(req); + task->tk_status = rpcauth_wrap_req(task, &xdr); } /* @@ -1762,6 +1755,7 @@ call_encode(struct rpc_task *task) { if (!rpc_task_need_encode(task)) goto out; + dprint_status(task); /* Encode here so that rpcsec_gss can use correct sequence number. */ rpc_xdr_encode(task); /* Did the encode result in an error condition? */ @@ -1779,6 +1773,8 @@ call_encode(struct rpc_task *task) rpc_exit(task, task->tk_status); } return; + } else { + xprt_request_prepare(task->tk_rqstp); } /* Add task to reply queue before transmission to avoid races */ @@ -2322,25 +2318,33 @@ out_retry: } } -static __be32 * -rpc_encode_header(struct rpc_task *task) +static int +rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; - __be32 *p = req->rq_svec[0].iov_base; - - /* FIXME: check buffer size? */ - - *p++ = req->rq_xid; /* XID */ - *p++ = htonl(RPC_CALL); /* CALL */ - *p++ = htonl(RPC_VERSION); /* RPC version */ - *p++ = htonl(clnt->cl_prog); /* program number */ - *p++ = htonl(clnt->cl_vers); /* program version */ - *p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */ - p = rpcauth_marshcred(task, p); - if (p) - req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p); - return p; + __be32 *p; + int error; + + error = -EMSGSIZE; + p = xdr_reserve_space(xdr, RPC_CALLHDRSIZE << 2); + if (!p) + goto out_fail; + *p++ = req->rq_xid; + *p++ = rpc_call; + *p++ = cpu_to_be32(RPC_VERSION); + *p++ = cpu_to_be32(clnt->cl_prog); + *p++ = cpu_to_be32(clnt->cl_vers); + *p = cpu_to_be32(task->tk_msg.rpc_proc->p_proc); + + error = rpcauth_marshcred(task, xdr); + if (error < 0) + goto out_fail; + return 0; +out_fail: + trace_rpc_bad_callhdr(task); + rpc_exit(task, error); + return error; } static __be32 * -- cgit v1.2.3 From 7f5667a5f8c4ff85b14ccce9d41f9244bd30ab68 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:24:53 -0500 Subject: SUNRPC: Clean up rpc_verify_header() - Recover some instruction count because I'm about to introduce a few xdr_inline_decode call sites - Replace dprintk() call sites with trace points - Reduce the hot path so it fits in fewer cachelines I've also renamed it rpc_decode_header() to match everything else in the RPC client. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 7 +- include/trace/events/sunrpc.h | 52 ++++++++++ net/sunrpc/clnt.c | 223 ++++++++++++++++++------------------------ 3 files changed, 154 insertions(+), 128 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 6df9ac1ca471..c54041950cc0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -92,6 +92,9 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) #define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS) #define rpc_call cpu_to_be32(RPC_CALL) +#define rpc_reply cpu_to_be32(RPC_REPLY) + +#define rpc_msg_accepted cpu_to_be32(RPC_MSG_ACCEPTED) #define rpc_success cpu_to_be32(RPC_SUCCESS) #define rpc_prog_unavail cpu_to_be32(RPC_PROG_UNAVAIL) @@ -101,6 +104,9 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) #define rpc_system_err cpu_to_be32(RPC_SYSTEM_ERR) #define rpc_drop_reply cpu_to_be32(RPC_DROP_REPLY) +#define rpc_mismatch cpu_to_be32(RPC_MISMATCH) +#define rpc_auth_error cpu_to_be32(RPC_AUTH_ERROR) + #define rpc_auth_ok cpu_to_be32(RPC_AUTH_OK) #define rpc_autherr_badcred cpu_to_be32(RPC_AUTH_BADCRED) #define rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED) @@ -109,7 +115,6 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) #define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK) #define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM) #define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM) -#define rpc_autherr_oldseqnum cpu_to_be32(101) /* * Miscellaneous XDR helper functions diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 2b3f9d139e75..0654e9c50371 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -241,6 +241,58 @@ DECLARE_EVENT_CLASS(rpc_failure, TP_ARGS(task)) DEFINE_RPC_FAILURE(callhdr); +DEFINE_RPC_FAILURE(verifier); + +DECLARE_EVENT_CLASS(rpc_reply_event, + + TP_PROTO( + const struct rpc_task *task + ), + + TP_ARGS(task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __string(progname, task->tk_client->cl_program->name) + __field(u32, version) + __string(procname, rpc_proc_name(task)) + __string(servername, task->tk_xprt->servername) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); + __assign_str(progname, task->tk_client->cl_program->name) + __entry->version = task->tk_client->cl_vers; + __assign_str(procname, rpc_proc_name(task)) + __assign_str(servername, task->tk_xprt->servername) + ), + + TP_printk("task:%u@%d server=%s xid=0x%08x %sv%d %s", + __entry->task_id, __entry->client_id, __get_str(servername), + __entry->xid, __get_str(progname), __entry->version, + __get_str(procname)) +) + +#define DEFINE_RPC_REPLY_EVENT(name) \ + DEFINE_EVENT(rpc_reply_event, rpc__##name, \ + TP_PROTO( \ + const struct rpc_task *task \ + ), \ + TP_ARGS(task)) + +DEFINE_RPC_REPLY_EVENT(prog_unavail); +DEFINE_RPC_REPLY_EVENT(prog_mismatch); +DEFINE_RPC_REPLY_EVENT(proc_unavail); +DEFINE_RPC_REPLY_EVENT(garbage_args); +DEFINE_RPC_REPLY_EVENT(unparsable); +DEFINE_RPC_REPLY_EVENT(mismatch); +DEFINE_RPC_REPLY_EVENT(stale_creds); +DEFINE_RPC_REPLY_EVENT(bad_creds); +DEFINE_RPC_REPLY_EVENT(auth_tooweak); TRACE_EVENT(rpc_stats_latency, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d6750b7f169a..e9735089bd66 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -79,7 +79,7 @@ static void call_connect_status(struct rpc_task *task); static int rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr); -static __be32 *rpc_verify_header(struct rpc_task *task); +static __be32 *rpc_decode_header(struct rpc_task *task); static int rpc_ping(struct rpc_clnt *clnt); static void rpc_register_client(struct rpc_clnt *clnt) @@ -2292,7 +2292,7 @@ call_decode(struct rpc_task *task) goto out_retry; } - p = rpc_verify_header(task); + p = rpc_decode_header(task); if (IS_ERR(p)) { if (p == ERR_PTR(-EAGAIN)) goto out_retry; @@ -2308,7 +2308,7 @@ call_decode(struct rpc_task *task) return; out_retry: task->tk_status = 0; - /* Note: rpc_verify_header() may have freed the RPC slot */ + /* Note: rpc_decode_header() may have freed the RPC slot */ if (task->tk_rqstp == req) { xdr_free_bvec(&req->rq_rcv_buf); req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0; @@ -2347,164 +2347,133 @@ out_fail: return error; } -static __be32 * -rpc_verify_header(struct rpc_task *task) +static noinline __be32 * +rpc_decode_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; int len = task->tk_rqstp->rq_rcv_buf.len >> 2; __be32 *p = iov->iov_base; - u32 n; int error = -EACCES; - if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) { - /* RFC-1014 says that the representation of XDR data must be a - * multiple of four bytes - * - if it isn't pointer subtraction in the NFS client may give - * undefined results - */ - dprintk("RPC: %5u %s: XDR representation not a multiple of" - " 4 bytes: 0x%x\n", task->tk_pid, __func__, - task->tk_rqstp->rq_rcv_buf.len); - error = -EIO; - goto out_err; - } + /* RFC-1014 says that the representation of XDR data must be a + * multiple of four bytes + * - if it isn't pointer subtraction in the NFS client may give + * undefined results + */ + if (task->tk_rqstp->rq_rcv_buf.len & 3) + goto out_badlen; if ((len -= 3) < 0) - goto out_overflow; + goto out_unparsable; - p += 1; /* skip XID */ - if ((n = ntohl(*p++)) != RPC_REPLY) { - dprintk("RPC: %5u %s: not an RPC reply: %x\n", - task->tk_pid, __func__, n); - error = -EIO; - goto out_garbage; - } + p++; /* skip XID */ + if (*p++ != rpc_reply) + goto out_unparsable; + if (*p++ != rpc_msg_accepted) + goto out_msg_denied; - if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { - if (--len < 0) - goto out_overflow; - switch ((n = ntohl(*p++))) { - case RPC_AUTH_ERROR: - break; - case RPC_MISMATCH: - dprintk("RPC: %5u %s: RPC call version mismatch!\n", - task->tk_pid, __func__); - error = -EPROTONOSUPPORT; - goto out_err; - default: - dprintk("RPC: %5u %s: RPC call rejected, " - "unknown error: %x\n", - task->tk_pid, __func__, n); - error = -EIO; - goto out_err; - } - if (--len < 0) - goto out_overflow; - switch ((n = ntohl(*p++))) { - case RPC_AUTH_REJECTEDCRED: - case RPC_AUTH_REJECTEDVERF: - case RPCSEC_GSS_CREDPROBLEM: - case RPCSEC_GSS_CTXPROBLEM: - if (!task->tk_cred_retry) - break; - task->tk_cred_retry--; - dprintk("RPC: %5u %s: retry stale creds\n", - task->tk_pid, __func__); - rpcauth_invalcred(task); - /* Ensure we obtain a new XID! */ - xprt_release(task); - task->tk_action = call_reserve; - goto out_retry; - case RPC_AUTH_BADCRED: - case RPC_AUTH_BADVERF: - /* possibly garbled cred/verf? */ - if (!task->tk_garb_retry) - break; - task->tk_garb_retry--; - dprintk("RPC: %5u %s: retry garbled creds\n", - task->tk_pid, __func__); - task->tk_action = call_encode; - goto out_retry; - case RPC_AUTH_TOOWEAK: - printk(KERN_NOTICE "RPC: server %s requires stronger " - "authentication.\n", - task->tk_xprt->servername); - break; - default: - dprintk("RPC: %5u %s: unknown auth error: %x\n", - task->tk_pid, __func__, n); - error = -EIO; - } - dprintk("RPC: %5u %s: call rejected %d\n", - task->tk_pid, __func__, n); - goto out_err; - } p = rpcauth_checkverf(task, p); - if (IS_ERR(p)) { - error = PTR_ERR(p); - dprintk("RPC: %5u %s: auth check failed with %d\n", - task->tk_pid, __func__, error); - goto out_garbage; /* bad verifier, retry */ - } + if (IS_ERR(p)) + goto out_verifier; + len = p - (__be32 *)iov->iov_base - 1; if (len < 0) - goto out_overflow; - switch ((n = ntohl(*p++))) { - case RPC_SUCCESS: + goto out_unparsable; + switch (*p++) { + case rpc_success: return p; - case RPC_PROG_UNAVAIL: - dprintk("RPC: %5u %s: program %u is unsupported " - "by server %s\n", task->tk_pid, __func__, - (unsigned int)clnt->cl_prog, - task->tk_xprt->servername); + case rpc_prog_unavail: + trace_rpc__prog_unavail(task); error = -EPFNOSUPPORT; goto out_err; - case RPC_PROG_MISMATCH: - dprintk("RPC: %5u %s: program %u, version %u unsupported " - "by server %s\n", task->tk_pid, __func__, - (unsigned int)clnt->cl_prog, - (unsigned int)clnt->cl_vers, - task->tk_xprt->servername); + case rpc_prog_mismatch: + trace_rpc__prog_mismatch(task); error = -EPROTONOSUPPORT; goto out_err; - case RPC_PROC_UNAVAIL: - dprintk("RPC: %5u %s: proc %s unsupported by program %u, " - "version %u on server %s\n", - task->tk_pid, __func__, - rpc_proc_name(task), - clnt->cl_prog, clnt->cl_vers, - task->tk_xprt->servername); + case rpc_proc_unavail: + trace_rpc__proc_unavail(task); error = -EOPNOTSUPP; goto out_err; - case RPC_GARBAGE_ARGS: - dprintk("RPC: %5u %s: server saw garbage\n", - task->tk_pid, __func__); - break; /* retry */ + case rpc_garbage_args: + trace_rpc__garbage_args(task); + break; default: - dprintk("RPC: %5u %s: server accept status: %x\n", - task->tk_pid, __func__, n); - /* Also retry */ + trace_rpc__unparsable(task); } out_garbage: clnt->cl_stats->rpcgarbage++; if (task->tk_garb_retry) { task->tk_garb_retry--; - dprintk("RPC: %5u %s: retrying\n", - task->tk_pid, __func__); task->tk_action = call_encode; -out_retry: return ERR_PTR(-EAGAIN); } out_err: rpc_exit(task, error); - dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid, - __func__, error); return ERR_PTR(error); -out_overflow: - dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid, - __func__); + +out_badlen: + trace_rpc__unparsable(task); + error = -EIO; + goto out_err; + +out_unparsable: + trace_rpc__unparsable(task); + error = -EIO; goto out_garbage; + +out_verifier: + trace_rpc_bad_verifier(task); + error = PTR_ERR(p); + goto out_garbage; + +out_msg_denied: + switch (*p++) { + case rpc_auth_error: + break; + case rpc_mismatch: + trace_rpc__mismatch(task); + error = -EPROTONOSUPPORT; + goto out_err; + default: + trace_rpc__unparsable(task); + error = -EIO; + goto out_err; + } + + switch (*p++) { + case rpc_autherr_rejectedcred: + case rpc_autherr_rejectedverf: + case rpcsec_gsserr_credproblem: + case rpcsec_gsserr_ctxproblem: + if (!task->tk_cred_retry) + break; + task->tk_cred_retry--; + trace_rpc__stale_creds(task); + rpcauth_invalcred(task); + /* Ensure we obtain a new XID! */ + xprt_release(task); + task->tk_action = call_reserve; + return ERR_PTR(-EAGAIN); + case rpc_autherr_badcred: + case rpc_autherr_badverf: + /* possibly garbled cred/verf? */ + if (!task->tk_garb_retry) + break; + task->tk_garb_retry--; + trace_rpc__bad_creds(task); + task->tk_action = call_encode; + return ERR_PTR(-EAGAIN); + case rpc_autherr_tooweak: + trace_rpc__auth_tooweak(task); + pr_warn("RPC: server %s requires stronger authentication.\n", + task->tk_xprt->servername); + break; + default: + trace_rpc__unparsable(task); + error = -EIO; + } + goto out_err; } static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr, -- cgit v1.2.3 From a0584ee9aed805446b044ce855e67264f0dc619e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:24:58 -0500 Subject: SUNRPC: Use struct xdr_stream when decoding RPC Reply header Modernize and harden the code path that parses an RPC Reply message. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 15 ++- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/auth.c | 63 ++++++++----- net/sunrpc/auth_gss/auth_gss.c | 204 ++++++++++++++++++++++------------------- net/sunrpc/auth_null.c | 31 +++---- net/sunrpc/auth_unix.c | 42 +++++---- net/sunrpc/clnt.c | 88 +++++++++--------- 7 files changed, 243 insertions(+), 201 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 96e237f8e60b..c51e1893f77e 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -134,11 +134,12 @@ struct rpc_credops { int (*crmarshal)(struct rpc_task *task, struct xdr_stream *xdr); int (*crrefresh)(struct rpc_task *); - __be32 * (*crvalidate)(struct rpc_task *, __be32 *); + int (*crvalidate)(struct rpc_task *task, + struct xdr_stream *xdr); int (*crwrap_req)(struct rpc_task *task, struct xdr_stream *xdr); - int (*crunwrap_resp)(struct rpc_task *, kxdrdproc_t, - void *, __be32 *, void *); + int (*crunwrap_resp)(struct rpc_task *task, + struct xdr_stream *xdr); int (*crkey_timeout)(struct rpc_cred *); char * (*crstringify_acceptor)(struct rpc_cred *); bool (*crneed_reencode)(struct rpc_task *); @@ -168,12 +169,16 @@ struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); void put_rpccred(struct rpc_cred *); int rpcauth_marshcred(struct rpc_task *task, struct xdr_stream *xdr); -__be32 * rpcauth_checkverf(struct rpc_task *, __be32 *); +int rpcauth_checkverf(struct rpc_task *task, + struct xdr_stream *xdr); int rpcauth_wrap_req_encode(struct rpc_task *task, struct xdr_stream *xdr); int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr); -int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj); +int rpcauth_unwrap_resp_decode(struct rpc_task *task, + struct xdr_stream *xdr); +int rpcauth_unwrap_resp(struct rpc_task *task, + struct xdr_stream *xdr); bool rpcauth_xmit_need_reencode(struct rpc_task *task); int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index c54041950cc0..65af6a204b75 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -89,6 +89,7 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) #define rpc_auth_null cpu_to_be32(RPC_AUTH_NULL) #define rpc_auth_unix cpu_to_be32(RPC_AUTH_UNIX) +#define rpc_auth_short cpu_to_be32(RPC_AUTH_SHORT) #define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS) #define rpc_call cpu_to_be32(RPC_CALL) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index add2135d9b01..e7861026b9e5 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -17,6 +17,8 @@ #include #include +#include + #define RPC_CREDCACHE_DEFAULT_HASHBITS (4) struct rpc_cred_cache { struct hlist_head *hashtable; @@ -773,14 +775,6 @@ int rpcauth_marshcred(struct rpc_task *task, struct xdr_stream *xdr) return ops->crmarshal(task, xdr); } -__be32 * -rpcauth_checkverf(struct rpc_task *task, __be32 *p) -{ - struct rpc_cred *cred = task->tk_rqstp->rq_cred; - - return cred->cr_ops->crvalidate(task, p); -} - /** * rpcauth_wrap_req_encode - XDR encode the RPC procedure * @task: controlling RPC task @@ -814,27 +808,52 @@ int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr) return ops->crwrap_req(task, xdr); } -static int -rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp, - __be32 *data, void *obj) +/** + * rpcauth_checkverf - Validate verifier in RPC Reply header + * @task: controlling RPC task + * @xdr: xdr_stream containing RPC Reply header + * + * On success, @xdr is updated to point past the verifier and + * zero is returned. Otherwise, @xdr is in an undefined state + * and a negative errno is returned. + */ +int +rpcauth_checkverf(struct rpc_task *task, struct xdr_stream *xdr) { - struct xdr_stream xdr; + const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data, rqstp); - return decode(rqstp, &xdr, obj); + return ops->crvalidate(task, xdr); } +/** + * rpcauth_unwrap_resp_decode - Invoke XDR decode function + * @task: controlling RPC task + * @xdr: stream where the Reply message resides + * + * Returns zero on success; otherwise a negative errno is returned. + */ int -rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, - __be32 *data, void *obj) +rpcauth_unwrap_resp_decode(struct rpc_task *task, struct xdr_stream *xdr) { - struct rpc_cred *cred = task->tk_rqstp->rq_cred; + kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode; + + return decode(task->tk_rqstp, xdr, task->tk_msg.rpc_resp); +} +EXPORT_SYMBOL_GPL(rpcauth_unwrap_resp_decode); + +/** + * rpcauth_unwrap_resp - Invoke unwrap and decode function for the cred + * @task: controlling RPC task + * @xdr: stream where the Reply message resides + * + * Returns zero on success; otherwise a negative errno is returned. + */ +int +rpcauth_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr) +{ + const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops; - if (cred->cr_ops->crunwrap_resp) - return cred->cr_ops->crunwrap_resp(task, decode, rqstp, - data, obj); - /* By default, we decode the arguments normally. */ - return rpcauth_unwrap_req_decode(decode, rqstp, data, obj); + return ops->crunwrap_resp(task, xdr); } bool diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index b333b1bdad45..206788e8b787 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1671,59 +1671,62 @@ gss_refresh_null(struct rpc_task *task) return 0; } -static __be32 * -gss_validate(struct rpc_task *task, __be32 *p) +static int +gss_validate(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - __be32 *seq = NULL; + __be32 *p, *seq = NULL; struct kvec iov; struct xdr_buf verf_buf; struct xdr_netobj mic; - u32 flav,len; - u32 maj_stat; - __be32 *ret = ERR_PTR(-EIO); + u32 len, maj_stat; + int status; - dprintk("RPC: %5u %s\n", task->tk_pid, __func__); + p = xdr_inline_decode(xdr, 2 * sizeof(*p)); + if (!p) + goto validate_failed; + if (*p++ != rpc_auth_gss) + goto validate_failed; + len = be32_to_cpup(p); + if (len > RPC_MAX_AUTH_SIZE) + goto validate_failed; + p = xdr_inline_decode(xdr, len); + if (!p) + goto validate_failed; - flav = ntohl(*p++); - if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) - goto out_bad; - if (flav != RPC_AUTH_GSS) - goto out_bad; seq = kmalloc(4, GFP_NOFS); if (!seq) - goto out_bad; - *seq = htonl(task->tk_rqstp->rq_seqno); + goto validate_failed; + *seq = cpu_to_be32(task->tk_rqstp->rq_seqno); iov.iov_base = seq; iov.iov_len = 4; xdr_buf_from_iov(&iov, &verf_buf); mic.data = (u8 *)p; mic.len = len; - - ret = ERR_PTR(-EACCES); maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); - if (maj_stat) { - dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n", - task->tk_pid, __func__, maj_stat); - goto out_bad; - } + if (maj_stat) + goto bad_mic; + /* We leave it to unwrap to calculate au_rslack. For now we just * calculate the length of the verifier: */ cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; + status = 0; +out: gss_put_ctx(ctx); - dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n", - task->tk_pid, __func__); - kfree(seq); - return p + XDR_QUADLEN(len); -out_bad: - gss_put_ctx(ctx); - dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__, - PTR_ERR(ret)); kfree(seq); - return ret; + return status; + +validate_failed: + status = -EIO; + goto out; +bad_mic: + dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n", + task->tk_pid, __func__, maj_stat); + status = -EACCES; + goto out; } static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, @@ -1921,79 +1924,98 @@ out: return status; } -static inline int +static int +gss_unwrap_resp_auth(struct rpc_cred *cred) +{ + cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize; + return 0; +} + +static int gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - struct rpc_rqst *rqstp, __be32 **p) + struct rpc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; - struct xdr_buf integ_buf; + struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf; + u32 data_offset, mic_offset, integ_len, maj_stat; struct xdr_netobj mic; - u32 data_offset, mic_offset; - u32 integ_len; - u32 maj_stat; - int status = -EIO; + __be32 *p; - integ_len = ntohl(*(*p)++); + p = xdr_inline_decode(xdr, 2 * sizeof(*p)); + if (unlikely(!p)) + goto unwrap_failed; + integ_len = be32_to_cpup(p++); if (integ_len & 3) - return status; - data_offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base; + goto unwrap_failed; + data_offset = (u8 *)(p) - (u8 *)rcv_buf->head[0].iov_base; mic_offset = integ_len + data_offset; if (mic_offset > rcv_buf->len) - return status; - if (ntohl(*(*p)++) != rqstp->rq_seqno) - return status; - - if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, - mic_offset - data_offset)) - return status; + goto unwrap_failed; + if (be32_to_cpup(p) != rqstp->rq_seqno) + goto unwrap_failed; + if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len)) + goto unwrap_failed; if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) - return status; - + goto unwrap_failed; maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) - return status; + goto bad_mic; + + cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + 2 + + 1 + XDR_QUADLEN(mic.len); return 0; +unwrap_failed: + return -EIO; +bad_mic: + dprintk("RPC: %s: gss_verify_mic returned error 0x%08x\n", + __func__, maj_stat); + return -EIO; } -static inline int +static int gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - struct rpc_rqst *rqstp, __be32 **p) -{ - struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; - u32 offset; - u32 opaque_len; - u32 maj_stat; - int status = -EIO; - - opaque_len = ntohl(*(*p)++); - offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base; + struct rpc_rqst *rqstp, struct xdr_stream *xdr) +{ + struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; + struct kvec *head = rqstp->rq_rcv_buf.head; + unsigned int savedlen = rcv_buf->len; + u32 offset, opaque_len, maj_stat; + __be32 *p; + + p = xdr_inline_decode(xdr, 2 * sizeof(*p)); + if (unlikely(!p)) + goto unwrap_failed; + opaque_len = be32_to_cpup(p++); + offset = (u8 *)(p) - (u8 *)head->iov_base; if (offset + opaque_len > rcv_buf->len) - return status; - /* remove padding: */ + goto unwrap_failed; rcv_buf->len = offset + opaque_len; maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) - return status; - if (ntohl(*(*p)++) != rqstp->rq_seqno) - return status; - - return 0; -} + goto bad_unwrap; + /* gss_unwrap decrypted the sequence number */ + if (be32_to_cpup(p++) != rqstp->rq_seqno) + goto unwrap_failed; -static int -gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp, - __be32 *p, void *obj) -{ - struct xdr_stream xdr; + /* gss_unwrap redacts the opaque blob from the head iovec. + * rcv_buf has changed, thus the stream needs to be reset. + */ + xdr_init_decode(xdr, rcv_buf, p, rqstp); - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p, rqstp); - return decode(rqstp, &xdr, obj); + cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + 2 + + XDR_QUADLEN(savedlen - rcv_buf->len); + return 0; +unwrap_failed: + return -EIO; +bad_unwrap: + dprintk("RPC: %s: gss_unwrap returned error 0x%08x\n", + __func__, maj_stat); + return -EIO; } static bool @@ -2037,39 +2059,33 @@ out: } static int -gss_unwrap_resp(struct rpc_task *task, - kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj) +gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr) { - struct rpc_cred *cred = task->tk_rqstp->rq_cred; + struct rpc_rqst *rqstp = task->tk_rqstp; + struct rpc_cred *cred = rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - __be32 *savedp = p; - struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head; - int savedlen = head->iov_len; - int status = -EIO; + int status = -EIO; if (ctx->gc_proc != RPC_GSS_PROC_DATA) goto out_decode; switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: + status = gss_unwrap_resp_auth(cred); break; case RPC_GSS_SVC_INTEGRITY: - status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p); - if (status) - goto out; + status = gss_unwrap_resp_integ(cred, ctx, rqstp, xdr); break; case RPC_GSS_SVC_PRIVACY: - status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p); - if (status) - goto out; + status = gss_unwrap_resp_priv(cred, ctx, rqstp, xdr); break; } - /* take into account extra slack for integrity and privacy cases: */ - cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp) - + (savedlen - head->iov_len); + if (status) + goto out; + out_decode: - status = gss_unwrap_req_decode(decode, rqstp, p, obj); + status = rpcauth_unwrap_resp_decode(task, xdr); out: gss_put_ctx(ctx); dprintk("RPC: %5u %s returning %d\n", diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 797f8472c21b..bf96975ffc4b 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -86,25 +86,19 @@ nul_refresh(struct rpc_task *task) return 0; } -static __be32 * -nul_validate(struct rpc_task *task, __be32 *p) +static int +nul_validate(struct rpc_task *task, struct xdr_stream *xdr) { - rpc_authflavor_t flavor; - u32 size; - - flavor = ntohl(*p++); - if (flavor != RPC_AUTH_NULL) { - printk("RPC: bad verf flavor: %u\n", flavor); - return ERR_PTR(-EIO); - } - - size = ntohl(*p++); - if (size != 0) { - printk("RPC: bad verf size: %u\n", size); - return ERR_PTR(-EIO); - } - - return p; + __be32 *p; + + p = xdr_inline_decode(xdr, 2 * sizeof(*p)); + if (!p) + return -EIO; + if (*p++ != rpc_auth_null) + return -EIO; + if (*p != xdr_zero) + return -EIO; + return 0; } const struct rpc_authops authnull_ops = { @@ -134,6 +128,7 @@ const struct rpc_credops null_credops = { .crwrap_req = rpcauth_wrap_req_encode, .crrefresh = nul_refresh, .crvalidate = nul_validate, + .crunwrap_resp = rpcauth_unwrap_resp_decode, }; static diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 1d5b7ed9c6f7..5ea84a96f96e 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -160,29 +160,32 @@ unx_refresh(struct rpc_task *task) return 0; } -static __be32 * -unx_validate(struct rpc_task *task, __be32 *p) +static int +unx_validate(struct rpc_task *task, struct xdr_stream *xdr) { - rpc_authflavor_t flavor; - u32 size; - - flavor = ntohl(*p++); - if (flavor != RPC_AUTH_NULL && - flavor != RPC_AUTH_UNIX && - flavor != RPC_AUTH_SHORT) { - printk("RPC: bad verf flavor: %u\n", flavor); - return ERR_PTR(-EIO); - } + __be32 *p; + u32 size; - size = ntohl(*p++); - if (size > RPC_MAX_AUTH_SIZE) { - printk("RPC: giant verf size: %u\n", size); - return ERR_PTR(-EIO); + p = xdr_inline_decode(xdr, 2 * sizeof(*p)); + if (!p) + return -EIO; + switch (*p++) { + case rpc_auth_null: + case rpc_auth_unix: + case rpc_auth_short: + break; + default: + return -EIO; } - task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2; - p += (size >> 2); + size = be32_to_cpup(p); + if (size > RPC_MAX_AUTH_SIZE) + return -EIO; + p = xdr_inline_decode(xdr, size); + if (!p) + return -EIO; - return p; + task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2; + return 0; } int __init rpc_init_authunix(void) @@ -223,4 +226,5 @@ const struct rpc_credops unix_credops = { .crwrap_req = rpcauth_wrap_req_encode, .crrefresh = unx_refresh, .crvalidate = unx_validate, + .crunwrap_resp = rpcauth_unwrap_resp_decode, }; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e9735089bd66..803e93105af1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -79,7 +79,8 @@ static void call_connect_status(struct rpc_task *task); static int rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr); -static __be32 *rpc_decode_header(struct rpc_task *task); +static int rpc_decode_header(struct rpc_task *task, + struct xdr_stream *xdr); static int rpc_ping(struct rpc_clnt *clnt); static void rpc_register_client(struct rpc_clnt *clnt) @@ -2251,12 +2252,11 @@ call_decode(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; - kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode; - __be32 *p; + struct xdr_stream xdr; dprint_status(task); - if (!decode) { + if (!task->tk_msg.rpc_proc->p_decode) { task->tk_action = rpc_exit_task; return; } @@ -2292,29 +2292,27 @@ call_decode(struct rpc_task *task) goto out_retry; } - p = rpc_decode_header(task); - if (IS_ERR(p)) { - if (p == ERR_PTR(-EAGAIN)) - goto out_retry; + xdr_init_decode(&xdr, &req->rq_rcv_buf, + req->rq_rcv_buf.head[0].iov_base, req); + switch (rpc_decode_header(task, &xdr)) { + case 0: + task->tk_action = rpc_exit_task; + task->tk_status = rpcauth_unwrap_resp(task, &xdr); + dprintk("RPC: %5u %s result %d\n", + task->tk_pid, __func__, task->tk_status); return; - } - task->tk_action = rpc_exit_task; - - task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, - task->tk_msg.rpc_resp); - - dprintk("RPC: %5u call_decode result %d\n", task->tk_pid, - task->tk_status); - return; + case -EAGAIN: out_retry: - task->tk_status = 0; - /* Note: rpc_decode_header() may have freed the RPC slot */ - if (task->tk_rqstp == req) { - xdr_free_bvec(&req->rq_rcv_buf); - req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0; - if (task->tk_client->cl_discrtry) - xprt_conditional_disconnect(req->rq_xprt, - req->rq_connect_cookie); + task->tk_status = 0; + /* Note: rpc_decode_header() may have freed the RPC slot */ + if (task->tk_rqstp == req) { + xdr_free_bvec(&req->rq_rcv_buf); + req->rq_reply_bytes_recvd = 0; + req->rq_rcv_buf.len = 0; + if (task->tk_client->cl_discrtry) + xprt_conditional_disconnect(req->rq_xprt, + req->rq_connect_cookie); + } } } @@ -2347,14 +2345,12 @@ out_fail: return error; } -static noinline __be32 * -rpc_decode_header(struct rpc_task *task) +static noinline int +rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_clnt *clnt = task->tk_client; - struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; - int len = task->tk_rqstp->rq_rcv_buf.len >> 2; - __be32 *p = iov->iov_base; int error = -EACCES; + __be32 *p; /* RFC-1014 says that the representation of XDR data must be a * multiple of four bytes @@ -2363,25 +2359,26 @@ rpc_decode_header(struct rpc_task *task) */ if (task->tk_rqstp->rq_rcv_buf.len & 3) goto out_badlen; - if ((len -= 3) < 0) - goto out_unparsable; + p = xdr_inline_decode(xdr, 3 * sizeof(*p)); + if (!p) + goto out_unparsable; p++; /* skip XID */ if (*p++ != rpc_reply) goto out_unparsable; if (*p++ != rpc_msg_accepted) goto out_msg_denied; - p = rpcauth_checkverf(task, p); - if (IS_ERR(p)) + error = rpcauth_checkverf(task, xdr); + if (error) goto out_verifier; - len = p - (__be32 *)iov->iov_base - 1; - if (len < 0) + p = xdr_inline_decode(xdr, sizeof(*p)); + if (!p) goto out_unparsable; - switch (*p++) { + switch (*p) { case rpc_success: - return p; + return 0; case rpc_prog_unavail: trace_rpc__prog_unavail(task); error = -EPFNOSUPPORT; @@ -2406,11 +2403,11 @@ out_garbage: if (task->tk_garb_retry) { task->tk_garb_retry--; task->tk_action = call_encode; - return ERR_PTR(-EAGAIN); + return -EAGAIN; } out_err: rpc_exit(task, error); - return ERR_PTR(error); + return error; out_badlen: trace_rpc__unparsable(task); @@ -2424,10 +2421,12 @@ out_unparsable: out_verifier: trace_rpc_bad_verifier(task); - error = PTR_ERR(p); goto out_garbage; out_msg_denied: + p = xdr_inline_decode(xdr, sizeof(*p)); + if (!p) + goto out_unparsable; switch (*p++) { case rpc_auth_error: break; @@ -2441,6 +2440,9 @@ out_msg_denied: goto out_err; } + p = xdr_inline_decode(xdr, sizeof(*p)); + if (!p) + goto out_unparsable; switch (*p++) { case rpc_autherr_rejectedcred: case rpc_autherr_rejectedverf: @@ -2454,7 +2456,7 @@ out_msg_denied: /* Ensure we obtain a new XID! */ xprt_release(task); task->tk_action = call_reserve; - return ERR_PTR(-EAGAIN); + return -EAGAIN; case rpc_autherr_badcred: case rpc_autherr_badverf: /* possibly garbled cred/verf? */ @@ -2463,7 +2465,7 @@ out_msg_denied: task->tk_garb_retry--; trace_rpc__bad_creds(task); task->tk_action = call_encode; - return ERR_PTR(-EAGAIN); + return -EAGAIN; case rpc_autherr_tooweak: trace_rpc__auth_tooweak(task); pr_warn("RPC: server %s requires stronger authentication.\n", -- cgit v1.2.3 From 0c77668ddb4e7bdfbca462c6185d154d0b8889ae Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:25:04 -0500 Subject: SUNRPC: Introduce trace points in rpc_auth_gss.ko Add infrastructure for trace points in the RPC_AUTH_GSS kernel module, and add a few sample trace points. These report exceptional or unexpected events, and observe the assignment of GSS sequence numbers. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcgss.h | 361 +++++++++++++++++++++++++++++++++++++++++ include/trace/events/rpcrdma.h | 12 +- include/trace/events/sunrpc.h | 61 ++++++- net/sunrpc/auth_gss/Makefile | 2 +- net/sunrpc/auth_gss/auth_gss.c | 165 +++++++++---------- net/sunrpc/auth_gss/trace.c | 11 ++ net/sunrpc/xprt.c | 10 +- 7 files changed, 530 insertions(+), 92 deletions(-) create mode 100644 include/trace/events/rpcgss.h create mode 100644 net/sunrpc/auth_gss/trace.c (limited to 'net') diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h new file mode 100644 index 000000000000..d1f7fe1b6fe4 --- /dev/null +++ b/include/trace/events/rpcgss.h @@ -0,0 +1,361 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018 Oracle. All rights reserved. + * + * Trace point definitions for the "rpcgss" subsystem. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rpcgss + +#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RPCGSS_H + +#include + +/** + ** GSS-API related trace events + **/ + +TRACE_DEFINE_ENUM(GSS_S_BAD_MECH); +TRACE_DEFINE_ENUM(GSS_S_BAD_NAME); +TRACE_DEFINE_ENUM(GSS_S_BAD_NAMETYPE); +TRACE_DEFINE_ENUM(GSS_S_BAD_BINDINGS); +TRACE_DEFINE_ENUM(GSS_S_BAD_STATUS); +TRACE_DEFINE_ENUM(GSS_S_BAD_SIG); +TRACE_DEFINE_ENUM(GSS_S_NO_CRED); +TRACE_DEFINE_ENUM(GSS_S_NO_CONTEXT); +TRACE_DEFINE_ENUM(GSS_S_DEFECTIVE_TOKEN); +TRACE_DEFINE_ENUM(GSS_S_DEFECTIVE_CREDENTIAL); +TRACE_DEFINE_ENUM(GSS_S_CREDENTIALS_EXPIRED); +TRACE_DEFINE_ENUM(GSS_S_CONTEXT_EXPIRED); +TRACE_DEFINE_ENUM(GSS_S_FAILURE); +TRACE_DEFINE_ENUM(GSS_S_BAD_QOP); +TRACE_DEFINE_ENUM(GSS_S_UNAUTHORIZED); +TRACE_DEFINE_ENUM(GSS_S_UNAVAILABLE); +TRACE_DEFINE_ENUM(GSS_S_DUPLICATE_ELEMENT); +TRACE_DEFINE_ENUM(GSS_S_NAME_NOT_MN); +TRACE_DEFINE_ENUM(GSS_S_CONTINUE_NEEDED); +TRACE_DEFINE_ENUM(GSS_S_DUPLICATE_TOKEN); +TRACE_DEFINE_ENUM(GSS_S_OLD_TOKEN); +TRACE_DEFINE_ENUM(GSS_S_UNSEQ_TOKEN); +TRACE_DEFINE_ENUM(GSS_S_GAP_TOKEN); + +#define show_gss_status(x) \ + __print_flags(x, "|", \ + { GSS_S_BAD_MECH, "GSS_S_BAD_MECH" }, \ + { GSS_S_BAD_NAME, "GSS_S_BAD_NAME" }, \ + { GSS_S_BAD_NAMETYPE, "GSS_S_BAD_NAMETYPE" }, \ + { GSS_S_BAD_BINDINGS, "GSS_S_BAD_BINDINGS" }, \ + { GSS_S_BAD_STATUS, "GSS_S_BAD_STATUS" }, \ + { GSS_S_BAD_SIG, "GSS_S_BAD_SIG" }, \ + { GSS_S_NO_CRED, "GSS_S_NO_CRED" }, \ + { GSS_S_NO_CONTEXT, "GSS_S_NO_CONTEXT" }, \ + { GSS_S_DEFECTIVE_TOKEN, "GSS_S_DEFECTIVE_TOKEN" }, \ + { GSS_S_DEFECTIVE_CREDENTIAL, "GSS_S_DEFECTIVE_CREDENTIAL" }, \ + { GSS_S_CREDENTIALS_EXPIRED, "GSS_S_CREDENTIALS_EXPIRED" }, \ + { GSS_S_CONTEXT_EXPIRED, "GSS_S_CONTEXT_EXPIRED" }, \ + { GSS_S_FAILURE, "GSS_S_FAILURE" }, \ + { GSS_S_BAD_QOP, "GSS_S_BAD_QOP" }, \ + { GSS_S_UNAUTHORIZED, "GSS_S_UNAUTHORIZED" }, \ + { GSS_S_UNAVAILABLE, "GSS_S_UNAVAILABLE" }, \ + { GSS_S_DUPLICATE_ELEMENT, "GSS_S_DUPLICATE_ELEMENT" }, \ + { GSS_S_NAME_NOT_MN, "GSS_S_NAME_NOT_MN" }, \ + { GSS_S_CONTINUE_NEEDED, "GSS_S_CONTINUE_NEEDED" }, \ + { GSS_S_DUPLICATE_TOKEN, "GSS_S_DUPLICATE_TOKEN" }, \ + { GSS_S_OLD_TOKEN, "GSS_S_OLD_TOKEN" }, \ + { GSS_S_UNSEQ_TOKEN, "GSS_S_UNSEQ_TOKEN" }, \ + { GSS_S_GAP_TOKEN, "GSS_S_GAP_TOKEN" }) + + +DECLARE_EVENT_CLASS(rpcgss_gssapi_event, + TP_PROTO( + const struct rpc_task *task, + u32 maj_stat + ), + + TP_ARGS(task, maj_stat), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, maj_stat) + + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->maj_stat = maj_stat; + ), + + TP_printk("task:%u@%u maj_stat=%s", + __entry->task_id, __entry->client_id, + __entry->maj_stat == 0 ? + "GSS_S_COMPLETE" : show_gss_status(__entry->maj_stat)) +); + +#define DEFINE_GSSAPI_EVENT(name) \ + DEFINE_EVENT(rpcgss_gssapi_event, rpcgss_##name, \ + TP_PROTO( \ + const struct rpc_task *task, \ + u32 maj_stat \ + ), \ + TP_ARGS(task, maj_stat)) + +TRACE_EVENT(rpcgss_import_ctx, + TP_PROTO( + int status + ), + + TP_ARGS(status), + + TP_STRUCT__entry( + __field(int, status) + ), + + TP_fast_assign( + __entry->status = status; + ), + + TP_printk("status=%d", __entry->status) +); + +DEFINE_GSSAPI_EVENT(get_mic); +DEFINE_GSSAPI_EVENT(verify_mic); +DEFINE_GSSAPI_EVENT(wrap); +DEFINE_GSSAPI_EVENT(unwrap); + + +/** + ** GSS auth unwrap failures + **/ + +TRACE_EVENT(rpcgss_unwrap_failed, + TP_PROTO( + const struct rpc_task *task + ), + + TP_ARGS(task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + ), + + TP_printk("task:%u@%u", __entry->task_id, __entry->client_id) +); + +TRACE_EVENT(rpcgss_bad_seqno, + TP_PROTO( + const struct rpc_task *task, + u32 expected, + u32 received + ), + + TP_ARGS(task, expected, received), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, expected) + __field(u32, received) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->expected = expected; + __entry->received = received; + ), + + TP_printk("task:%u@%u expected seqno %u, received seqno %u", + __entry->task_id, __entry->client_id, + __entry->expected, __entry->received) +); + +TRACE_EVENT(rpcgss_seqno, + TP_PROTO( + const struct rpc_task *task + ), + + TP_ARGS(task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(u32, seqno) + ), + + TP_fast_assign( + const struct rpc_rqst *rqst = task->tk_rqstp; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->seqno = rqst->rq_seqno; + ), + + TP_printk("task:%u@%u xid=0x%08x seqno=%u", + __entry->task_id, __entry->client_id, + __entry->xid, __entry->seqno) +); + +TRACE_EVENT(rpcgss_need_reencode, + TP_PROTO( + const struct rpc_task *task, + u32 seq_xmit, + bool ret + ), + + TP_ARGS(task, seq_xmit, ret), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(u32, seq_xmit) + __field(u32, seqno) + __field(bool, ret) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); + __entry->seq_xmit = seq_xmit; + __entry->seqno = task->tk_rqstp->rq_seqno; + __entry->ret = ret; + ), + + TP_printk("task:%u@%u xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded", + __entry->task_id, __entry->client_id, + __entry->xid, __entry->seqno, __entry->seq_xmit, + __entry->ret ? "" : "un") +); + +/** + ** gssd upcall related trace events + **/ + +TRACE_EVENT(rpcgss_upcall_msg, + TP_PROTO( + const char *buf + ), + + TP_ARGS(buf), + + TP_STRUCT__entry( + __string(msg, buf) + ), + + TP_fast_assign( + __assign_str(msg, buf) + ), + + TP_printk("msg='%s'", __get_str(msg)) +); + +TRACE_EVENT(rpcgss_upcall_result, + TP_PROTO( + u32 uid, + int result + ), + + TP_ARGS(uid, result), + + TP_STRUCT__entry( + __field(u32, uid) + __field(int, result) + + ), + + TP_fast_assign( + __entry->uid = uid; + __entry->result = result; + ), + + TP_printk("for uid %u, result=%d", __entry->uid, __entry->result) +); + +TRACE_EVENT(rpcgss_context, + TP_PROTO( + unsigned long expiry, + unsigned long now, + unsigned int timeout, + unsigned int len, + const u8 *data + ), + + TP_ARGS(expiry, now, timeout, len, data), + + TP_STRUCT__entry( + __field(unsigned long, expiry) + __field(unsigned long, now) + __field(unsigned int, timeout) + __field(int, len) + __string(acceptor, data) + ), + + TP_fast_assign( + __entry->expiry = expiry; + __entry->now = now; + __entry->timeout = timeout; + __entry->len = len; + strncpy(__get_str(acceptor), data, len); + ), + + TP_printk("gc_expiry=%lu now=%lu timeout=%u acceptor=%.*s", + __entry->expiry, __entry->now, __entry->timeout, + __entry->len, __get_str(acceptor)) +); + + +/** + ** Miscellaneous events + */ + +TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5); +TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5I); +TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5P); + +#define show_pseudoflavor(x) \ + __print_symbolic(x, \ + { RPC_AUTH_GSS_KRB5, "RPC_AUTH_GSS_KRB5" }, \ + { RPC_AUTH_GSS_KRB5I, "RPC_AUTH_GSS_KRB5I" }, \ + { RPC_AUTH_GSS_KRB5P, "RPC_AUTH_GSS_KRB5P" }) + + +TRACE_EVENT(rpcgss_createauth, + TP_PROTO( + unsigned int flavor, + int error + ), + + TP_ARGS(flavor, error), + + TP_STRUCT__entry( + __field(unsigned int, flavor) + __field(int, error) + + ), + + TP_fast_assign( + __entry->flavor = flavor; + __entry->error = error; + ), + + TP_printk("flavor=%s error=%d", + show_pseudoflavor(__entry->flavor), __entry->error) +); + + +#endif /* _TRACE_RPCGSS_H */ + +#include diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 399b1aedc927..962975b4313f 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -521,12 +521,18 @@ TRACE_EVENT(xprtrdma_post_send, TP_STRUCT__entry( __field(const void *, req) + __field(unsigned int, task_id) + __field(unsigned int, client_id) __field(int, num_sge) __field(int, signaled) __field(int, status) ), TP_fast_assign( + const struct rpc_rqst *rqst = &req->rl_slot; + + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; __entry->req = req; __entry->num_sge = req->rl_sendctx->sc_wr.num_sge; __entry->signaled = req->rl_sendctx->sc_wr.send_flags & @@ -534,9 +540,11 @@ TRACE_EVENT(xprtrdma_post_send, __entry->status = status; ), - TP_printk("req=%p, %d SGEs%s, status=%d", + TP_printk("task:%u@%u req=%p (%d SGE%s) %sstatus=%d", + __entry->task_id, __entry->client_id, __entry->req, __entry->num_sge, - (__entry->signaled ? ", signaled" : ""), + (__entry->num_sge == 1 ? "" : "s"), + (__entry->signaled ? "signaled " : ""), __entry->status ) ); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 0654e9c50371..e58dda8e038c 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -655,9 +655,68 @@ DECLARE_EVENT_CLASS(rpc_xprt_event, DEFINE_RPC_XPRT_EVENT(timer); DEFINE_RPC_XPRT_EVENT(lookup_rqst); -DEFINE_RPC_XPRT_EVENT(transmit); DEFINE_RPC_XPRT_EVENT(complete_rqst); +TRACE_EVENT(xprt_transmit, + TP_PROTO( + const struct rpc_rqst *rqst, + int status + ), + + TP_ARGS(rqst, status), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(u32, seqno) + __field(int, status) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->seqno = rqst->rq_seqno; + __entry->status = status; + ), + + TP_printk( + "task:%u@%u xid=0x%08x seqno=%u status=%d", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->seqno, __entry->status) +); + +TRACE_EVENT(xprt_enq_xmit, + TP_PROTO( + const struct rpc_task *task, + int stage + ), + + TP_ARGS(task, stage), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(u32, seqno) + __field(int, stage) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); + __entry->seqno = task->tk_rqstp->rq_seqno; + __entry->stage = stage; + ), + + TP_printk( + "task:%u@%u xid=0x%08x seqno=%u stage=%d", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->seqno, __entry->stage) +); + TRACE_EVENT(xprt_ping, TP_PROTO(const struct rpc_xprt *xprt, int status), diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index c374268b008f..4a29f4c5dac4 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o auth_rpcgss-y := auth_gss.o gss_generic_token.o \ gss_mech_switch.o svcauth_gss.o \ - gss_rpc_upcall.o gss_rpc_xdr.o + gss_rpc_upcall.o gss_rpc_xdr.o trace.o obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 206788e8b787..3d1fbd6d3370 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -55,6 +55,8 @@ #include "../netns.h" +#include + static const struct rpc_authops authgss_ops; static const struct rpc_credops gss_credops; @@ -260,6 +262,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct } ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS); if (ret < 0) { + trace_rpcgss_import_ctx(ret); p = ERR_PTR(ret); goto err; } @@ -275,12 +278,9 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct if (IS_ERR(p)) goto err; done: - dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n", - __func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len, - ctx->gc_acceptor.data); - return p; + trace_rpcgss_context(ctx->gc_expiry, now, timeout, + ctx->gc_acceptor.len, ctx->gc_acceptor.data); err: - dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p)); return p; } @@ -354,10 +354,8 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth if (auth && pos->auth->service != auth->service) continue; refcount_inc(&pos->count); - dprintk("RPC: %s found msg %p\n", __func__, pos); return pos; } - dprintk("RPC: %s found nothing\n", __func__); return NULL; } @@ -456,7 +454,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, size_t buflen = sizeof(gss_msg->databuf); int len; - len = scnprintf(p, buflen, "mech=%s uid=%d ", mech->gm_name, + len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name, from_kuid(&init_user_ns, gss_msg->uid)); buflen -= len; p += len; @@ -467,7 +465,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, * identity that we are authenticating to. */ if (target_name) { - len = scnprintf(p, buflen, "target=%s ", target_name); + len = scnprintf(p, buflen, " target=%s", target_name); buflen -= len; p += len; gss_msg->msg.len += len; @@ -487,11 +485,11 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, char *c = strchr(service_name, '@'); if (!c) - len = scnprintf(p, buflen, "service=%s ", + len = scnprintf(p, buflen, " service=%s", service_name); else len = scnprintf(p, buflen, - "service=%.*s srchost=%s ", + " service=%.*s srchost=%s", (int)(c - service_name), service_name, c + 1); buflen -= len; @@ -500,17 +498,17 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, } if (mech->gm_upcall_enctypes) { - len = scnprintf(p, buflen, "enctypes=%s ", + len = scnprintf(p, buflen, " enctypes=%s", mech->gm_upcall_enctypes); buflen -= len; p += len; gss_msg->msg.len += len; } + trace_rpcgss_upcall_msg(gss_msg->databuf); len = scnprintf(p, buflen, "\n"); if (len == 0) goto out_overflow; gss_msg->msg.len += len; - gss_msg->msg.data = gss_msg->databuf; return 0; out_overflow: @@ -603,8 +601,6 @@ gss_refresh_upcall(struct rpc_task *task) struct rpc_pipe *pipe; int err = 0; - dprintk("RPC: %5u %s for uid %u\n", - task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid)); gss_msg = gss_setup_upcall(gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we @@ -612,7 +608,8 @@ gss_refresh_upcall(struct rpc_task *task) warn_gssd(); task->tk_timeout = 15*HZ; rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL); - return -EAGAIN; + err = -EAGAIN; + goto out; } if (IS_ERR(gss_msg)) { err = PTR_ERR(gss_msg); @@ -635,9 +632,8 @@ gss_refresh_upcall(struct rpc_task *task) spin_unlock(&pipe->lock); gss_release_msg(gss_msg); out: - dprintk("RPC: %5u %s for uid %u result %d\n", - task->tk_pid, __func__, - from_kuid(&init_user_ns, cred->cr_cred->fsuid), err); + trace_rpcgss_upcall_result(from_kuid(&init_user_ns, + cred->cr_cred->fsuid), err); return err; } @@ -652,14 +648,13 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) DEFINE_WAIT(wait); int err; - dprintk("RPC: %s for uid %u\n", - __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid)); retry: err = 0; /* if gssd is down, just skip upcalling altogether */ if (!gssd_running(net)) { warn_gssd(); - return -EACCES; + err = -EACCES; + goto out; } gss_msg = gss_setup_upcall(gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { @@ -700,8 +695,8 @@ out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); out: - dprintk("RPC: %s for uid %u result %d\n", - __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid), err); + trace_rpcgss_upcall_result(from_kuid(&init_user_ns, + cred->cr_cred->fsuid), err); return err; } @@ -794,7 +789,6 @@ err_put_ctx: err: kfree(buf); out: - dprintk("RPC: %s returning %zd\n", __func__, err); return err; } @@ -863,8 +857,6 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); if (msg->errno < 0) { - dprintk("RPC: %s releasing msg %p\n", - __func__, gss_msg); refcount_inc(&gss_msg->count); gss_unhash_msg(gss_msg); if (msg->errno == -ETIMEDOUT) @@ -1024,8 +1016,6 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) struct rpc_auth * auth; int err = -ENOMEM; /* XXX? */ - dprintk("RPC: creating GSS authenticator for client %p\n", clnt); - if (!try_module_get(THIS_MODULE)) return ERR_PTR(err); if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) @@ -1041,10 +1031,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) gss_auth->net = get_net(rpc_net_ns(clnt)); err = -EINVAL; gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); - if (!gss_auth->mech) { - dprintk("RPC: Pseudoflavor %d not found!\n", flavor); + if (!gss_auth->mech) goto err_put_net; - } gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); if (gss_auth->service == 0) goto err_put_mech; @@ -1099,6 +1087,7 @@ err_free: kfree(gss_auth); out_dec: module_put(THIS_MODULE); + trace_rpcgss_createauth(flavor, err); return ERR_PTR(err); } @@ -1135,9 +1124,6 @@ gss_destroy(struct rpc_auth *auth) struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); - dprintk("RPC: destroying GSS authenticator %p flavor %d\n", - auth, auth->au_flavor); - if (hash_hashed(&gss_auth->hash)) { spin_lock(&gss_auth_hash_lock); hash_del(&gss_auth->hash); @@ -1300,8 +1286,6 @@ gss_send_destroy_context(struct rpc_cred *cred) static void gss_do_free_ctx(struct gss_cl_ctx *ctx) { - dprintk("RPC: %s\n", __func__); - gss_delete_sec_context(&ctx->gc_gss_ctx); kfree(ctx->gc_wire_ctx.data); kfree(ctx->gc_acceptor.data); @@ -1324,7 +1308,6 @@ gss_free_ctx(struct gss_cl_ctx *ctx) static void gss_free_cred(struct gss_cred *gss_cred) { - dprintk("RPC: %s cred=%p\n", __func__, gss_cred); kfree(gss_cred); } @@ -1381,10 +1364,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t struct gss_cred *cred = NULL; int err = -ENOMEM; - dprintk("RPC: %s for uid %d, flavor %d\n", - __func__, from_kuid(&init_user_ns, acred->cred->fsuid), - auth->au_flavor); - if (!(cred = kzalloc(sizeof(*cred), gfp))) goto out_err; @@ -1400,7 +1379,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t return &cred->gc_base; out_err: - dprintk("RPC: %s failed with error %d\n", __func__, err); return ERR_PTR(err); } @@ -1544,15 +1522,14 @@ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr) struct xdr_netobj mic; struct kvec iov; struct xdr_buf verf_buf; - - dprintk("RPC: %5u %s\n", task->tk_pid, __func__); + int status; /* Credential */ p = xdr_reserve_space(xdr, 7 * sizeof(*p) + ctx->gc_wire_ctx.len); if (!p) - goto out_put_ctx; + goto marshal_failed; *p++ = rpc_auth_gss; cred_len = p++; @@ -1560,7 +1537,8 @@ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr) req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ; spin_unlock(&ctx->gc_seq_lock); if (req->rq_seqno == MAXSEQ) - goto out_expired; + goto expired; + trace_rpcgss_seqno(task); *p++ = cpu_to_be32(RPC_GSS_VERSION); *p++ = cpu_to_be32(ctx->gc_proc); @@ -1579,25 +1557,31 @@ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr) p = xdr_reserve_space(xdr, sizeof(*p)); if (!p) - goto out_put_ctx; + goto marshal_failed; *p++ = rpc_auth_gss; mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - goto out_expired; + goto expired; else if (maj_stat != 0) - goto out_put_ctx; + goto bad_mic; if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0) - goto out_put_ctx; - gss_put_ctx(ctx); - return 0; -out_expired: + goto marshal_failed; + status = 0; +out: gss_put_ctx(ctx); + return status; +expired: clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); - return -EKEYEXPIRED; -out_put_ctx: - gss_put_ctx(ctx); - return -EMSGSIZE; + status = -EKEYEXPIRED; + goto out; +marshal_failed: + status = -EMSGSIZE; + goto out; +bad_mic: + trace_rpcgss_get_mic(task, maj_stat); + status = -EIO; + goto out; } static int gss_renew_cred(struct rpc_task *task) @@ -1723,8 +1707,7 @@ validate_failed: status = -EIO; goto out; bad_mic: - dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n", - task->tk_pid, __func__, maj_stat); + trace_rpcgss_verify_mic(task, maj_stat); status = -EACCES; goto out; } @@ -1761,13 +1744,16 @@ static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) - goto wrap_failed; + goto bad_mic; /* Check that the trailing MIC fit in the buffer, after the fact */ if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0) goto wrap_failed; return 0; wrap_failed: return -EMSGSIZE; +bad_mic: + trace_rpcgss_get_mic(task, maj_stat); + return -EIO; } static void @@ -1860,7 +1846,6 @@ static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); snd_buf->tail[0].iov_base = tmp; } - status = -EIO; offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); /* slack space should prevent this ever happening: */ @@ -1871,7 +1856,7 @@ static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) - goto wrap_failed; + goto bad_wrap; *opaque_len = cpu_to_be32(snd_buf->len - offset); /* guess whether the pad goes into the head or the tail: */ @@ -1888,6 +1873,9 @@ static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, return 0; wrap_failed: return status; +bad_wrap: + trace_rpcgss_wrap(task, maj_stat); + return -EIO; } static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr) @@ -1898,7 +1886,6 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr) struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); int status; - dprintk("RPC: %5u %s\n", task->tk_pid, __func__); status = -EIO; if (ctx->gc_proc != RPC_GSS_PROC_DATA) { /* The spec seems a little ambiguous here, but I think that not @@ -1917,10 +1904,11 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr) case RPC_GSS_SVC_PRIVACY: status = gss_wrap_req_priv(cred, ctx, task, xdr); break; + default: + status = -EIO; } out: gss_put_ctx(ctx); - dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status); return status; } @@ -1932,8 +1920,9 @@ gss_unwrap_resp_auth(struct rpc_cred *cred) } static int -gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - struct rpc_rqst *rqstp, struct xdr_stream *xdr) +gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, + struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp, + struct xdr_stream *xdr) { struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf; u32 data_offset, mic_offset, integ_len, maj_stat; @@ -1951,7 +1940,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, if (mic_offset > rcv_buf->len) goto unwrap_failed; if (be32_to_cpup(p) != rqstp->rq_seqno) - goto unwrap_failed; + goto bad_seqno; if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len)) goto unwrap_failed; @@ -1967,16 +1956,20 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, 1 + XDR_QUADLEN(mic.len); return 0; unwrap_failed: + trace_rpcgss_unwrap_failed(task); + return -EIO; +bad_seqno: + trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(p)); return -EIO; bad_mic: - dprintk("RPC: %s: gss_verify_mic returned error 0x%08x\n", - __func__, maj_stat); + trace_rpcgss_verify_mic(task, maj_stat); return -EIO; } static int -gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - struct rpc_rqst *rqstp, struct xdr_stream *xdr) +gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, + struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp, + struct xdr_stream *xdr) { struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct kvec *head = rqstp->rq_rcv_buf.head; @@ -2000,7 +1993,7 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, goto bad_unwrap; /* gss_unwrap decrypted the sequence number */ if (be32_to_cpup(p++) != rqstp->rq_seqno) - goto unwrap_failed; + goto bad_seqno; /* gss_unwrap redacts the opaque blob from the head iovec. * rcv_buf has changed, thus the stream needs to be reset. @@ -2011,10 +2004,13 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, XDR_QUADLEN(savedlen - rcv_buf->len); return 0; unwrap_failed: + trace_rpcgss_unwrap_failed(task); + return -EIO; +bad_seqno: + trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(--p)); return -EIO; bad_unwrap: - dprintk("RPC: %s: gss_unwrap returned error 0x%08x\n", - __func__, maj_stat); + trace_rpcgss_unwrap(task, maj_stat); return -EIO; } @@ -2030,14 +2026,14 @@ gss_xmit_need_reencode(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_cred *cred = req->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 win, seq_xmit; + u32 win, seq_xmit = 0; bool ret = true; if (!ctx) - return true; + goto out; if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq))) - goto out; + goto out_ctx; seq_xmit = READ_ONCE(ctx->gc_seq_xmit); while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) { @@ -2046,15 +2042,18 @@ gss_xmit_need_reencode(struct rpc_task *task) seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno); if (seq_xmit == tmp) { ret = false; - goto out; + goto out_ctx; } } win = ctx->gc_win; if (win > 0) ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win); -out: + +out_ctx: gss_put_ctx(ctx); +out: + trace_rpcgss_need_reencode(task, seq_xmit, ret); return ret; } @@ -2075,10 +2074,10 @@ gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr) status = gss_unwrap_resp_auth(cred); break; case RPC_GSS_SVC_INTEGRITY: - status = gss_unwrap_resp_integ(cred, ctx, rqstp, xdr); + status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr); break; case RPC_GSS_SVC_PRIVACY: - status = gss_unwrap_resp_priv(cred, ctx, rqstp, xdr); + status = gss_unwrap_resp_priv(task, cred, ctx, rqstp, xdr); break; } if (status) @@ -2088,8 +2087,6 @@ out_decode: status = rpcauth_unwrap_resp_decode(task, xdr); out: gss_put_ctx(ctx); - dprintk("RPC: %5u %s returning %d\n", - task->tk_pid, __func__, status); return status; } diff --git a/net/sunrpc/auth_gss/trace.c b/net/sunrpc/auth_gss/trace.c new file mode 100644 index 000000000000..5576f1e66de9 --- /dev/null +++ b/net/sunrpc/auth_gss/trace.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018, 2019 Oracle. All rights reserved. + */ + +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index f1ec2110efeb..bc7489f1fe55 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1165,6 +1165,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task) /* Note: req is added _before_ pos */ list_add_tail(&req->rq_xmit, &pos->rq_xmit); INIT_LIST_HEAD(&req->rq_xmit2); + trace_xprt_enq_xmit(task, 1); goto out; } } else if (RPC_IS_SWAPPER(task)) { @@ -1176,6 +1177,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task) /* Note: req is added _before_ pos */ list_add_tail(&req->rq_xmit, &pos->rq_xmit); INIT_LIST_HEAD(&req->rq_xmit2); + trace_xprt_enq_xmit(task, 2); goto out; } } else if (!req->rq_seqno) { @@ -1184,11 +1186,13 @@ xprt_request_enqueue_transmit(struct rpc_task *task) continue; list_add_tail(&req->rq_xmit2, &pos->rq_xmit2); INIT_LIST_HEAD(&req->rq_xmit); + trace_xprt_enq_xmit(task, 3); goto out; } } list_add_tail(&req->rq_xmit, &xprt->xmit_queue); INIT_LIST_HEAD(&req->rq_xmit2); + trace_xprt_enq_xmit(task, 4); out: set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); spin_unlock(&xprt->queue_lock); @@ -1313,8 +1317,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) int is_retrans = RPC_WAS_SENT(task); int status; - dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); - if (!req->rq_bytes_sent) { if (xprt_request_data_received(task)) { status = 0; @@ -1336,9 +1338,9 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) connect_cookie = xprt->connect_cookie; status = xprt->ops->send_request(req); - trace_xprt_transmit(xprt, req->rq_xid, status); if (status != 0) { req->rq_ntrans--; + trace_xprt_transmit(req, status); return status; } @@ -1347,7 +1349,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) xprt_inject_disconnect(xprt); - dprintk("RPC: %5u xmit complete\n", task->tk_pid); task->tk_flags |= RPC_TASK_SENT; spin_lock_bh(&xprt->transport_lock); @@ -1360,6 +1361,7 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) req->rq_connect_cookie = connect_cookie; out_dequeue: + trace_xprt_transmit(req, status); xprt_request_dequeue_transmit(task); rpc_wake_up_queued_task_set_status(&xprt->sending, task, status); return status; -- cgit v1.2.3 From 241b1f419f0ea9966d574d7cc67377c74982a125 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:25:09 -0500 Subject: SUNRPC: Remove xdr_buf_trim() The key action of xdr_buf_trim() is that it shortens buf->len, the length of the xdr_buf's content. The other actions -- shortening the head, pages, and tail components -- are actually not necessary. In particular, changing the size of those components can corrupt the RPC message contained in the buffer. This is an accident waiting to happen rather than a current bug, as far as we know. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 1 - net/sunrpc/auth_gss/gss_krb5_wrap.c | 8 +++++--- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- net/sunrpc/xdr.c | 41 ------------------------------------- 4 files changed, 6 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 65af6a204b75..9ee3970ba59c 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -179,7 +179,6 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); -extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 5cdde6cb703a..14a0aff0cd84 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -570,14 +570,16 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) */ movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len); movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip; - BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > - buf->head[0].iov_len); + if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > + buf->head[0].iov_len) + return GSS_S_FAILURE; memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen); buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip; /* Trim off the trailing "extra count" and checksum blob */ - xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip); + buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip; + return GSS_S_COMPLETE; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 152790ed309c..f1aabab4a4c2 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -896,7 +896,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g if (svc_getnl(&buf->head[0]) != seq) goto out; /* trim off the mic and padding at the end before returning */ - xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4); + buf->len -= 4 + round_up_to_quad(mic.len); stat = 0; out: kfree(mic.data); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 5f0aa53fa4ae..4bce61978062 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1139,47 +1139,6 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, } EXPORT_SYMBOL_GPL(xdr_buf_subsegment); -/** - * xdr_buf_trim - lop at most "len" bytes off the end of "buf" - * @buf: buf to be trimmed - * @len: number of bytes to reduce "buf" by - * - * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note - * that it's possible that we'll trim less than that amount if the xdr_buf is - * too small, or if (for instance) it's all in the head and the parser has - * already read too far into it. - */ -void xdr_buf_trim(struct xdr_buf *buf, unsigned int len) -{ - size_t cur; - unsigned int trim = len; - - if (buf->tail[0].iov_len) { - cur = min_t(size_t, buf->tail[0].iov_len, trim); - buf->tail[0].iov_len -= cur; - trim -= cur; - if (!trim) - goto fix_len; - } - - if (buf->page_len) { - cur = min_t(unsigned int, buf->page_len, trim); - buf->page_len -= cur; - trim -= cur; - if (!trim) - goto fix_len; - } - - if (buf->head[0].iov_len) { - cur = min_t(size_t, buf->head[0].iov_len, trim); - buf->head[0].iov_len -= cur; - trim -= cur; - } -fix_len: - buf->len -= (len - trim); -} -EXPORT_SYMBOL_GPL(xdr_buf_trim); - static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) { unsigned int this_len; -- cgit v1.2.3 From 2573a46499956c2ff311e1a65052364243161e78 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:25:15 -0500 Subject: SUNRPC: Add SPDX IDs to some net/sunrpc/auth_gss/ files Files under net/sunrpc/auth_gss/ do not yet have SPDX ID tags. This directory is somewhat complicated because most of these files have license boilerplate that is not strictly GPL 2.0. In this patch I add ID tags where there is an obvious match. The less recognizable licenses are still under research. For reference, SPDX IDs added in this patch correspond to the following license text: GPL-2.0 https://spdx.org/licenses/GPL-2.0.html GPL-2.0+ https://spdx.org/licenses/GPL-2.0+.html BSD-3-Clause https://spdx.org/licenses/BSD-3-Clause.html Cc: Simo Sorce Cc: Kate Stewart Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/auth_gss/auth_gss.c | 27 +-------------------------- net/sunrpc/auth_gss/gss_krb5_mech.c | 27 +-------------------------- net/sunrpc/auth_gss/gss_mech_switch.c | 27 +-------------------------- net/sunrpc/auth_gss/gss_rpc_upcall.c | 15 +-------------- net/sunrpc/auth_gss/gss_rpc_upcall.h | 16 ++-------------- net/sunrpc/auth_gss/gss_rpc_xdr.c | 15 +-------------- net/sunrpc/auth_gss/gss_rpc_xdr.h | 17 +---------------- net/sunrpc/auth_gss/svcauth_gss.c | 1 + 8 files changed, 9 insertions(+), 136 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 3d1fbd6d3370..fda454c9b594 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: BSD-3-Clause /* * linux/net/sunrpc/auth_gss/auth_gss.c * @@ -8,34 +9,8 @@ * * Dug Song * Andy Adamson - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - #include #include #include diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index be31a58d54e0..56cc85c5bc06 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: BSD-3-Clause /* * linux/net/sunrpc/gss_krb5_mech.c * @@ -6,32 +7,6 @@ * * Andy Adamson * J. Bruce Fields - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 379318dff534..82060099a429 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: BSD-3-Clause /* * linux/net/sunrpc/gss_mech_switch.c * @@ -5,32 +6,6 @@ * All rights reserved. * * J. Bruce Fields - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 73dcda060335..0349f455a862 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/net/sunrpc/gss_rpc_upcall.c * * Copyright (C) 2012 Simo Sorce - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.h b/net/sunrpc/auth_gss/gss_rpc_upcall.h index 1e542aded90a..31e96344167e 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.h +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.h @@ -1,21 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * linux/net/sunrpc/gss_rpc_upcall.h * * Copyright (C) 2012 Simo Sorce - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _GSS_RPC_UPCALL_H @@ -45,4 +32,5 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data); void init_gssp_clnt(struct sunrpc_net *); int set_gssp_clnt(struct net *); void clear_gssp_clnt(struct sunrpc_net *); + #endif /* _GSS_RPC_UPCALL_H */ diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 006062ad5f58..2ff7b7083eba 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * GSS Proxy upcall module * * Copyright (C) 2012 Simo Sorce - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h index 146c31032917..3f17411b7e65 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.h +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h @@ -1,21 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * GSS Proxy upcall module * * Copyright (C) 2012 Simo Sorce - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _LINUX_GSS_RPC_XDR_H @@ -262,6 +249,4 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, #define GSSX_ARG_wrap_size_limit_sz 0 #define GSSX_RES_wrap_size_limit_sz 0 - - #endif /* _LINUX_GSS_RPC_XDR_H */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index f1aabab4a4c2..0c5d7896d6dd 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Neil Brown * J. Bruce Fields -- cgit v1.2.3 From cf500bac8fd48b57f38ece890235923d4ed5ee91 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:25:20 -0500 Subject: SUNRPC: Introduce rpc_prepare_reply_pages() prepare_reply_buffer() and its NFSv4 equivalents expose the details of the RPC header and the auth slack values to upper layer consumers, creating a layering violation, and duplicating code. Remedy these issues by adding a new RPC client API that hides those details from upper layers in a common helper function. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfs2xdr.c | 27 +++++------------------ fs/nfs/nfs3xdr.c | 29 ++++++------------------ fs/nfs/nfs4xdr.c | 51 ++++++++++++++++++------------------------- include/linux/sunrpc/clnt.h | 3 +++ include/trace/events/sunrpc.h | 37 +++++++++++++++++++++++++++++++ net/sunrpc/clnt.c | 19 ++++++++++++++++ net/sunrpc/xdr.c | 9 ++++++++ 7 files changed, 102 insertions(+), 73 deletions(-) (limited to 'net') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index bac3a4e2cb5d..1dcd0feda32d 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -65,21 +65,6 @@ static int nfs_stat_to_errno(enum nfs_stat); -/* - * While encoding arguments, set up the reply buffer in advance to - * receive reply data directly into the page cache. - */ -static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages, - unsigned int base, unsigned int len, - unsigned int bufsize) -{ - struct rpc_auth *auth = req->rq_cred->cr_auth; - unsigned int replen; - - replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize; - xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len); -} - /* * Encode/decode NFSv2 basic data types * @@ -593,8 +578,8 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req, const struct nfs_readlinkargs *args = data; encode_fhandle(xdr, args->fh); - prepare_reply_buffer(req, args->pages, args->pgbase, - args->pglen, NFS_readlinkres_sz); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->pglen, NFS_readlinkres_sz); } /* @@ -629,8 +614,8 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, const struct nfs_pgio_args *args = data; encode_readargs(xdr, args); - prepare_reply_buffer(req, args->pages, args->pgbase, - args->count, NFS_readres_sz); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->count, NFS_readres_sz); req->rq_rcv_buf.flags |= XDRBUF_READ; } @@ -787,8 +772,8 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req, const struct nfs_readdirargs *args = data; encode_readdirargs(xdr, args); - prepare_reply_buffer(req, args->pages, 0, - args->count, NFS_readdirres_sz); + rpc_prepare_reply_pages(req, args->pages, 0, + args->count, NFS_readdirres_sz); } /* diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 4aa3ffe1800e..a54dcf4bfb1d 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -104,21 +104,6 @@ static const umode_t nfs_type2fmt[] = { [NF3FIFO] = S_IFIFO, }; -/* - * While encoding arguments, set up the reply buffer in advance to - * receive reply data directly into the page cache. - */ -static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages, - unsigned int base, unsigned int len, - unsigned int bufsize) -{ - struct rpc_auth *auth = req->rq_cred->cr_auth; - unsigned int replen; - - replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize; - xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len); -} - /* * Encode/decode NFSv3 basic data types * @@ -910,8 +895,8 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, const struct nfs3_readlinkargs *args = data; encode_nfs_fh3(xdr, args->fh); - prepare_reply_buffer(req, args->pages, args->pgbase, - args->pglen, NFS3_readlinkres_sz); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->pglen, NFS3_readlinkres_sz); } /* @@ -943,8 +928,8 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, unsigned int replen = args->replen ? args->replen : NFS3_readres_sz; encode_read3args(xdr, args); - prepare_reply_buffer(req, args->pages, args->pgbase, - args->count, replen); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->count, replen); req->rq_rcv_buf.flags |= XDRBUF_READ; } @@ -1236,7 +1221,7 @@ static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req, const struct nfs3_readdirargs *args = data; encode_readdir3args(xdr, args); - prepare_reply_buffer(req, args->pages, 0, + rpc_prepare_reply_pages(req, args->pages, 0, args->count, NFS3_readdirres_sz); } @@ -1278,7 +1263,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req, const struct nfs3_readdirargs *args = data; encode_readdirplus3args(xdr, args); - prepare_reply_buffer(req, args->pages, 0, + rpc_prepare_reply_pages(req, args->pages, 0, args->count, NFS3_readdirres_sz); } @@ -1323,7 +1308,7 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req, encode_nfs_fh3(xdr, args->fh); encode_uint32(xdr, args->mask); if (args->mask & (NFS_ACL | NFS_DFACL)) { - prepare_reply_buffer(req, args->pages, 0, + rpc_prepare_reply_pages(req, args->pages, 0, NFSACL_MAXPAGES << PAGE_SHIFT, ACL3_getaclres_sz); req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 38a4cbc18657..d0fa18df32ea 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1016,12 +1016,11 @@ static void encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; - struct rpc_auth *auth = req->rq_cred->cr_auth; /* initialize running count of expected bytes in reply. * NOTE: the replied tag SHOULD be the same is the one sent, * but this is not required as a MUST for the server to do so. */ - hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; + hdr->replen = 3 + hdr->taglen; WARN_ON_ONCE(hdr->taglen > NFS4_MAXTAGLEN); encode_string(xdr, hdr->taglen, hdr->tag); @@ -2341,9 +2340,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); if (args->lg_args) { encode_layoutget(xdr, args->lg_args, &hdr); - xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, - args->lg_args->layout.pages, - 0, args->lg_args->layout.pglen); + rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0, + args->lg_args->layout.pglen, + hdr.replen); } encode_nops(&hdr); } @@ -2387,9 +2386,9 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); if (args->lg_args) { encode_layoutget(xdr, args->lg_args, &hdr); - xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, - args->lg_args->layout.pages, - 0, args->lg_args->layout.pglen); + rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0, + args->lg_args->layout.pglen, + hdr.replen); } encode_nops(&hdr); } @@ -2499,8 +2498,8 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_readlink(xdr, args, req, &hdr); - xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, - args->pgbase, args->pglen); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->pglen, hdr.replen); encode_nops(&hdr); } @@ -2520,11 +2519,8 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_readdir(xdr, args, req, &hdr); - xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, - args->pgbase, args->count); - dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", - __func__, hdr.replen << 2, args->pages, - args->pgbase, args->count); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->count, hdr.replen); encode_nops(&hdr); } @@ -2544,8 +2540,8 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_read(xdr, args, &hdr); - xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, - args->pages, args->pgbase, args->count); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->count, hdr.replen); req->rq_rcv_buf.flags |= XDRBUF_READ; encode_nops(&hdr); } @@ -2591,9 +2587,8 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_getattr(xdr, nfs4_acl_bitmap, NULL, ARRAY_SIZE(nfs4_acl_bitmap), &hdr); - xdr_inline_pages(&req->rq_rcv_buf, replen << 2, - args->acl_pages, 0, args->acl_len); - + rpc_prepare_reply_pages(req, args->acl_pages, 0, + args->acl_len, replen); encode_nops(&hdr); } @@ -2814,9 +2809,8 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, encode_fs_locations(xdr, args->bitmask, &hdr); } - /* Set up reply kvec to capture returned fs_locations array. */ - xdr_inline_pages(&req->rq_rcv_buf, replen << 2, - (struct page **)&args->page, 0, PAGE_SIZE); + rpc_prepare_reply_pages(req, (struct page **)&args->page, 0, + PAGE_SIZE, replen); encode_nops(&hdr); } @@ -3018,10 +3012,8 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, /* set up reply kvec. Subtract notification bitmap max size (2) * so that notification bitmap is put in xdr_buf tail */ - xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2, - args->pdev->pages, args->pdev->pgbase, - args->pdev->pglen); - + rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase, + args->pdev->pglen, hdr.replen - 2); encode_nops(&hdr); } @@ -3042,9 +3034,8 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, encode_putfh(xdr, NFS_FH(args->inode), &hdr); encode_layoutget(xdr, args, &hdr); - xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, - args->layout.pages, 0, args->layout.pglen); - + rpc_prepare_reply_pages(req, args->layout.pages, 0, + args->layout.pglen, hdr.replen); encode_nops(&hdr); } diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 1c441714d569..98bc9883b230 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -169,6 +169,9 @@ int rpcb_v4_register(struct net *net, const u32 program, const char *netid); void rpcb_getport_async(struct rpc_task *); +void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages, + unsigned int base, unsigned int len, + unsigned int hdrsize); void rpc_call_start(struct rpc_task *); int rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index e58dda8e038c..8451f30c6a0f 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -461,6 +461,43 @@ TRACE_EVENT(rpc_xdr_alignment, ) ); +TRACE_EVENT(rpc_reply_pages, + TP_PROTO( + const struct rpc_rqst *req + ), + + TP_ARGS(req), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(const void *, head_base) + __field(size_t, head_len) + __field(const void *, tail_base) + __field(size_t, tail_len) + __field(unsigned int, page_len) + ), + + TP_fast_assign( + __entry->task_id = req->rq_task->tk_pid; + __entry->client_id = req->rq_task->tk_client->cl_clid; + + __entry->head_base = req->rq_rcv_buf.head[0].iov_base; + __entry->head_len = req->rq_rcv_buf.head[0].iov_len; + __entry->page_len = req->rq_rcv_buf.page_len; + __entry->tail_base = req->rq_rcv_buf.tail[0].iov_base; + __entry->tail_len = req->rq_rcv_buf.tail[0].iov_len; + ), + + TP_printk( + "task:%u@%u xdr=[%p,%zu]/%u/[%p,%zu]\n", + __entry->task_id, __entry->client_id, + __entry->head_base, __entry->head_len, + __entry->page_len, + __entry->tail_base, __entry->tail_len + ) +); + /* * First define the enums in the below macros to be exported to userspace * via TRACE_DEFINE_ENUM(). diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 803e93105af1..f780605fffe0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1164,6 +1164,25 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ +/** + * rpc_prepare_reply_pages - Prepare to receive a reply data payload into pages + * @req: RPC request to prepare + * @pages: vector of struct page pointers + * @base: offset in first page where receive should start, in bytes + * @len: expected size of the upper layer data payload, in bytes + * @hdrsize: expected size of upper layer reply header, in XDR words + * + */ +void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages, + unsigned int base, unsigned int len, + unsigned int hdrsize) +{ + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack; + xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len); + trace_rpc_reply_pages(req); +} +EXPORT_SYMBOL_GPL(rpc_prepare_reply_pages); + void rpc_call_start(struct rpc_task *task) { diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 4bce61978062..7cca51560442 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -163,6 +163,15 @@ xdr_free_bvec(struct xdr_buf *buf) buf->bvec = NULL; } +/** + * xdr_inline_pages - Prepare receive buffer for a large reply + * @xdr: xdr_buf into which reply will be placed + * @offset: expected offset where data payload will start, in bytes + * @pages: vector of struct page pointers + * @base: offset in first page where receive should start, in bytes + * @len: expected size of the upper layer data payload, in bytes + * + */ void xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, struct page **pages, unsigned int base, unsigned int len) -- cgit v1.2.3 From 02ef04e432babf8fc703104212314e54112ecd2d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:25:25 -0500 Subject: NFS: Account for XDR pad of buf->pages Certain NFS results (eg. READLINK) might expect a data payload that is not an exact multiple of 4 bytes. In this case, XDR encoding is required to pad that payload so its length on the wire is a multiple of 4 bytes. The constants that define the maximum size of each NFS result do not appear to account for this extra word. In each case where the data payload is to be received into pages: - 1 word is added to the size of the receive buffer allocated by call_allocate - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the extra buffer space falls into the rcv_buf's tail iovec - If buf->pagelen is word-aligned, an XDR pad is not needed and is thus removed from the tail Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfs2xdr.c | 6 +++--- fs/nfs/nfs3xdr.c | 10 +++++----- fs/nfs/nfs4xdr.c | 15 ++++++++------- net/sunrpc/clnt.c | 6 +++++- net/sunrpc/xdr.c | 2 ++ 5 files changed, 23 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 1dcd0feda32d..a7ed29de0a40 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -56,11 +56,11 @@ #define NFS_attrstat_sz (1+NFS_fattr_sz) #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz) -#define NFS_readlinkres_sz (2) -#define NFS_readres_sz (1+NFS_fattr_sz+1) +#define NFS_readlinkres_sz (2+1) +#define NFS_readres_sz (1+NFS_fattr_sz+1+1) #define NFS_writeres_sz (NFS_attrstat_sz) #define NFS_stat_sz (1) -#define NFS_readdirres_sz (1) +#define NFS_readdirres_sz (1+1) #define NFS_statfsres_sz (1+NFS_info_sz) static int nfs_stat_to_errno(enum nfs_stat); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index a54dcf4bfb1d..110358f4986d 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -69,13 +69,13 @@ #define NFS3_removeres_sz (NFS3_setattrres_sz) #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz)) #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1) -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1) -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3) +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1) +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1) #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4) #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz)) #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2) +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1) #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13) #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12) #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) @@ -85,7 +85,7 @@ #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \ XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \ - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1) #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) static int nfs3_stat_to_errno(enum nfs_stat); @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, result->op_status = status; if (status != NFS3_OK) goto out_status; - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2); + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2); error = decode_read3resok(xdr, result); out: return error; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d0fa18df32ea..6d9d5e2f6308 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, nfs4_fattr_bitmap_maxsz) #define encode_read_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) -#define decode_read_maxsz (op_decode_hdr_maxsz + 2) +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1) #define encode_readdir_maxsz (op_encode_hdr_maxsz + \ 2 + encode_verifier_maxsz + 5 + \ nfs4_label_maxsz) #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ - decode_verifier_maxsz) + decode_verifier_maxsz + 1) #define encode_readlink_maxsz (op_encode_hdr_maxsz) -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1) +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1) #define encode_write_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 4) #define decode_write_maxsz (op_decode_hdr_maxsz + \ @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) #define encode_getacl_maxsz (encode_getattr_maxsz) #define decode_getacl_maxsz (op_decode_hdr_maxsz + \ - nfs4_fattr_bitmap_maxsz + 1) + nfs4_fattr_bitmap_maxsz + 1 + 1) #define encode_setacl_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) #define decode_setacl_maxsz (decode_setattr_maxsz) #define encode_fs_locations_maxsz \ (encode_getattr_maxsz) #define decode_fs_locations_maxsz \ - (0) + (1) #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4)) @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, 1 /* opaque devaddr4 length */ + \ /* devaddr4 payload is read into page */ \ 1 /* notification bitmap length */ + \ - 1 /* notification bitmap, word 0 */) + 1 /* notification bitmap, word 0 */ + \ + 1 /* possible XDR padding */) #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ encode_stateid_maxsz) #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ decode_stateid_maxsz + \ - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1) #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \ 2 /* offset */ + \ 2 /* length */ + \ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f780605fffe0..4ea38b029e2f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages, unsigned int base, unsigned int len, unsigned int hdrsize) { - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack; + /* Subtract one to force an extra word of buffer space for the + * payload's XDR pad to fall into the rcv_buf's tail iovec. + */ + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1; + xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len); trace_rpc_reply_pages(req); } diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 7cca51560442..aa8177ddcbda 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -189,6 +189,8 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, tail->iov_base = buf + offset; tail->iov_len = buflen - offset; + if ((xdr->page_len & 3) == 0) + tail->iov_len -= sizeof(__be32); xdr->buflen += len; } -- cgit v1.2.3 From a00275baa68e1ee226cc659f54dc3a571f3ad600 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:25:31 -0500 Subject: SUNRPC: Make AUTH_SYS and AUTH_NULL set au_verfsize au_verfsize will be needed for a non-flavor-specific computation in a subsequent patch. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 3 +-- net/sunrpc/auth_gss/auth_gss.c | 1 + net/sunrpc/auth_null.c | 1 + net/sunrpc/auth_unix.c | 5 ++++- 4 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index c51e1893f77e..359dfdd04e77 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -77,8 +77,7 @@ struct rpc_auth { /* guess at number of u32's auth adds before * reply data; normally the verifier size: */ unsigned int au_rslack; - /* for gss, used to calculate au_rslack: */ - unsigned int au_verfsize; + unsigned int au_verfsize; /* size of reply verifier */ unsigned int au_flags; /* various flags */ const struct rpc_authops *au_ops; /* operations */ diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index fda454c9b594..731e7a482e18 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1016,6 +1016,7 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; auth->au_rslack = GSS_VERF_SLACK >> 2; + auth->au_verfsize = GSS_VERF_SLACK >> 2; auth->au_flags = 0; auth->au_ops = &authgss_ops; auth->au_flavor = flavor; diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index bf96975ffc4b..9ae08248a9e1 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -114,6 +114,7 @@ static struct rpc_auth null_auth = { .au_cslack = NUL_CALLSLACK, .au_rslack = NUL_REPLYSLACK, + .au_verfsize = NUL_REPLYSLACK, .au_ops = &authnull_ops, .au_flavor = RPC_AUTH_NULL, .au_count = REFCOUNT_INIT(1), diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 5ea84a96f96e..a93c56442487 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -163,6 +163,7 @@ unx_refresh(struct rpc_task *task) static int unx_validate(struct rpc_task *task, struct xdr_stream *xdr) { + struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth; __be32 *p; u32 size; @@ -184,7 +185,8 @@ unx_validate(struct rpc_task *task, struct xdr_stream *xdr) if (!p) return -EIO; - task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2; + auth->au_verfsize = XDR_QUADLEN(size) + 2; + auth->au_rslack = XDR_QUADLEN(size) + 2; return 0; } @@ -212,6 +214,7 @@ static struct rpc_auth unix_auth = { .au_cslack = UNX_CALLSLACK, .au_rslack = NUL_REPLYSLACK, + .au_verfsize = NUL_REPLYSLACK, .au_ops = &authunix_ops, .au_flavor = RPC_AUTH_UNIX, .au_count = REFCOUNT_INIT(1), -- cgit v1.2.3 From 35e77d21baa04b554bf3dc9a08dfa7e569286e51 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:25:36 -0500 Subject: SUNRPC: Add rpc_auth::au_ralign field Currently rpc_inline_rcv_pages() uses au_rslack to estimate the size of the upper layer reply header. This is fine for auth flavors where au_verfsize == au_rslack. However, some auth flavors have more going on. krb5i for example has two more words after the verifier, and another blob following the RPC message. The calculation involving au_rslack pushes the upper layer reply header too far into the rcv_buf. au_rslack is still valuable: it's the amount of buffer space needed for the reply, and is used when allocating the reply buffer. We'll keep that. But, add a new field that can be used to properly estimate the location of the upper layer header in each RPC reply, based on the auth flavor in use. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 9 ++++----- net/sunrpc/auth_gss/auth_gss.c | 18 +++++++++++++----- net/sunrpc/auth_null.c | 1 + net/sunrpc/auth_unix.c | 1 + net/sunrpc/clnt.c | 2 +- 5 files changed, 20 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 359dfdd04e77..5f9076fdb090 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -74,13 +74,12 @@ struct rpc_cred_cache; struct rpc_authops; struct rpc_auth { unsigned int au_cslack; /* call cred size estimate */ - /* guess at number of u32's auth adds before - * reply data; normally the verifier size: */ - unsigned int au_rslack; + unsigned int au_rslack; /* reply cred size estimate */ unsigned int au_verfsize; /* size of reply verifier */ + unsigned int au_ralign; /* words before UL header */ - unsigned int au_flags; /* various flags */ - const struct rpc_authops *au_ops; /* operations */ + unsigned int au_flags; + const struct rpc_authops *au_ops; rpc_authflavor_t au_flavor; /* pseudoflavor (note may * differ from the flavor in * au_ops->au_flavor in gss diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 731e7a482e18..c67e2ad151ae 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1017,6 +1017,7 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) auth->au_cslack = GSS_CRED_SLACK >> 2; auth->au_rslack = GSS_VERF_SLACK >> 2; auth->au_verfsize = GSS_VERF_SLACK >> 2; + auth->au_ralign = GSS_VERF_SLACK >> 2; auth->au_flags = 0; auth->au_ops = &authgss_ops; auth->au_flavor = flavor; @@ -1891,7 +1892,10 @@ out: static int gss_unwrap_resp_auth(struct rpc_cred *cred) { - cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize; + struct rpc_auth *auth = cred->cr_auth; + + auth->au_rslack = auth->au_verfsize; + auth->au_ralign = auth->au_verfsize; return 0; } @@ -1902,6 +1906,7 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, { struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf; u32 data_offset, mic_offset, integ_len, maj_stat; + struct rpc_auth *auth = cred->cr_auth; struct xdr_netobj mic; __be32 *p; @@ -1928,8 +1933,8 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, if (maj_stat != GSS_S_COMPLETE) goto bad_mic; - cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + 2 + - 1 + XDR_QUADLEN(mic.len); + auth->au_rslack = auth->au_verfsize + 2 + 1 + XDR_QUADLEN(mic.len); + auth->au_ralign = auth->au_verfsize + 2; return 0; unwrap_failed: trace_rpcgss_unwrap_failed(task); @@ -1949,6 +1954,7 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, { struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct kvec *head = rqstp->rq_rcv_buf.head; + struct rpc_auth *auth = cred->cr_auth; unsigned int savedlen = rcv_buf->len; u32 offset, opaque_len, maj_stat; __be32 *p; @@ -1976,8 +1982,10 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, */ xdr_init_decode(xdr, rcv_buf, p, rqstp); - cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + 2 + - XDR_QUADLEN(savedlen - rcv_buf->len); + auth->au_rslack = auth->au_verfsize + 2 + + XDR_QUADLEN(savedlen - rcv_buf->len); + auth->au_ralign = auth->au_verfsize + 2 + + XDR_QUADLEN(savedlen - rcv_buf->len); return 0; unwrap_failed: trace_rpcgss_unwrap_failed(task); diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 9ae08248a9e1..41a633a4049e 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -115,6 +115,7 @@ struct rpc_auth null_auth = { .au_cslack = NUL_CALLSLACK, .au_rslack = NUL_REPLYSLACK, .au_verfsize = NUL_REPLYSLACK, + .au_ralign = NUL_REPLYSLACK, .au_ops = &authnull_ops, .au_flavor = RPC_AUTH_NULL, .au_count = REFCOUNT_INIT(1), diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index a93c56442487..c048eb6deaaf 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -187,6 +187,7 @@ unx_validate(struct rpc_task *task, struct xdr_stream *xdr) auth->au_verfsize = XDR_QUADLEN(size) + 2; auth->au_rslack = XDR_QUADLEN(size) + 2; + auth->au_ralign = XDR_QUADLEN(size) + 2; return 0; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4ea38b029e2f..99bfeb17367c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1180,7 +1180,7 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages, /* Subtract one to force an extra word of buffer space for the * payload's XDR pad to fall into the rcv_buf's tail iovec. */ - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1; + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign - 1; xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len); trace_rpc_reply_pages(req); -- cgit v1.2.3 From 2c94b8eca1a26cd46010d6e73a23da5f2e93a19d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2019 11:25:41 -0500 Subject: SUNRPC: Use au_rslack when computing reply buffer size au_rslack is significantly smaller than (au_cslack << 2). Using that value results in smaller receive buffers. In some cases this eliminates an extra segment in Reply chunks (RPC/RDMA). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 99bfeb17367c..241e8423fd0c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1690,7 +1690,7 @@ call_refreshresult(struct rpc_task *task) static void call_allocate(struct rpc_task *task) { - unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack; + const struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; @@ -1715,9 +1715,10 @@ call_allocate(struct rpc_task *task) * and reply headers, and convert both values * to byte sizes. */ - req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen; + req->rq_callsize = RPC_CALLHDRSIZE + (auth->au_cslack << 1) + + proc->p_arglen; req->rq_callsize <<= 2; - req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen; + req->rq_rcvsize = RPC_REPHDRSIZE + auth->au_rslack + proc->p_replen; req->rq_rcvsize <<= 2; status = xprt->ops->buf_alloc(task); -- cgit v1.2.3 From a1231fda7e944adf37d8368b2e182041a39ea1ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Feb 2019 10:02:29 -0500 Subject: SUNRPC: Set memalloc_nofs_save() on all rpciod/xprtiod jobs Set memalloc_nofs_save() on all the rpciod/xprtiod jobs so that we ensure memory allocations for asynchronous rpc calls don't ever end up recursing back to the NFS layer for memory reclaim. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 7 +++++++ net/sunrpc/xprt.c | 3 +++ net/sunrpc/xprtsock.c | 10 +++++++--- 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index adc3c40cc733..2168d4d9c09f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -902,7 +903,10 @@ void rpc_execute(struct rpc_task *task) static void rpc_async_schedule(struct work_struct *work) { + unsigned int pflags = memalloc_nofs_save(); + __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); + memalloc_nofs_restore(pflags); } /** @@ -1067,7 +1071,10 @@ static void rpc_free_task(struct rpc_task *task) static void rpc_async_release(struct work_struct *work) { + unsigned int pflags = memalloc_nofs_save(); + rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); + memalloc_nofs_restore(pflags); } static void rpc_release_resources_task(struct rpc_task *task) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index f1ec2110efeb..b95f4452dbc6 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -643,11 +644,13 @@ static void xprt_autoclose(struct work_struct *work) { struct rpc_xprt *xprt = container_of(work, struct rpc_xprt, task_cleanup); + unsigned int pflags = memalloc_nofs_save(); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); xprt->ops->close(xprt); xprt_release_write(xprt, NULL); wake_up_bit(&xprt->state, XPRT_LOCKED); + memalloc_nofs_restore(pflags); } /** diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7754aa3e434f..f5d7dcd9e8d9 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -50,6 +50,7 @@ #include #include #include +#include #include @@ -680,7 +681,10 @@ static void xs_stream_data_receive_workfn(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, recv_worker); + unsigned int pflags = memalloc_nofs_save(); + xs_stream_data_receive(transport); + memalloc_nofs_restore(pflags); } static void @@ -1378,7 +1382,10 @@ static void xs_udp_data_receive_workfn(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, recv_worker); + unsigned int pflags = memalloc_nofs_save(); + xs_udp_data_receive(transport); + memalloc_nofs_restore(pflags); } /** @@ -1869,7 +1876,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, sk->sk_write_space = xs_udp_write_space; sock_set_flag(sk, SOCK_FASYNC); sk->sk_error_report = xs_error_report; - sk->sk_allocation = GFP_NOIO; xprt_clear_connected(xprt); @@ -2057,7 +2063,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; sock_set_flag(sk, SOCK_FASYNC); - sk->sk_allocation = GFP_NOIO; xprt_set_connected(xprt); @@ -2220,7 +2225,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_write_space = xs_tcp_write_space; sock_set_flag(sk, SOCK_FASYNC); sk->sk_error_report = xs_error_report; - sk->sk_allocation = GFP_NOIO; /* socket options */ sock_reset_flag(sk, SOCK_LINGER); -- cgit v1.2.3 From 0ffe86f48026b7f34db22d1004bc9992f0db8b33 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jan 2019 14:51:26 -0500 Subject: SUNRPC: Use poll() to fix up the socket requeue races Because we clear XPRT_SOCK_DATA_READY before reading, we can end up with a situation where new data arrives, causing xs_data_ready() to queue up a second receive worker job for the same socket, which then immediately gets stuck waiting on the transport receive mutex. The fix is to only clear XPRT_SOCK_DATA_READY once we're done reading, and then to use poll() to check if we might need to queue up a new job in order to deal with any new data. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f5d7dcd9e8d9..da45bb1e931e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -656,13 +656,34 @@ out_err: return ret != 0 ? ret : -ESHUTDOWN; } +static __poll_t xs_poll_socket(struct sock_xprt *transport) +{ + return transport->sock->ops->poll(NULL, transport->sock, NULL); +} + +static bool xs_poll_socket_readable(struct sock_xprt *transport) +{ + __poll_t events = xs_poll_socket(transport); + + return (events & (EPOLLIN | EPOLLRDNORM)) && !(events & EPOLLRDHUP); +} + +static void xs_poll_check_readable(struct sock_xprt *transport) +{ + + clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); + if (!xs_poll_socket_readable(transport)) + return; + if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) + queue_work(xprtiod_workqueue, &transport->recv_worker); +} + static void xs_stream_data_receive(struct sock_xprt *transport) { size_t read = 0; ssize_t ret = 0; mutex_lock(&transport->recv_mutex); - clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); if (transport->sock == NULL) goto out; for (;;) { @@ -672,6 +693,7 @@ static void xs_stream_data_receive(struct sock_xprt *transport) read += ret; cond_resched(); } + xs_poll_check_readable(transport); out: mutex_unlock(&transport->recv_mutex); trace_xs_stream_read_data(&transport->xprt, ret, read); @@ -1362,7 +1384,6 @@ static void xs_udp_data_receive(struct sock_xprt *transport) int err; mutex_lock(&transport->recv_mutex); - clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); sk = transport->inet; if (sk == NULL) goto out; @@ -1374,6 +1395,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) consume_skb(skb); cond_resched(); } + xs_poll_check_readable(transport); out: mutex_unlock(&transport->recv_mutex); } -- cgit v1.2.3 From b9779a54bb224cd2a993c9bf8a1910a7494c062e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 2 Jan 2019 15:54:42 -0500 Subject: SUNRPC: Ensure rq_bytes_sent is reset before request transmission When we resend a request, ensure that the 'rq_bytes_sent' is reset to zero. Signed-off-by: Trond Myklebust --- net/sunrpc/backchannel_rqst.c | 1 - net/sunrpc/xprt.c | 2 -- net/sunrpc/xprtrdma/backchannel.c | 1 - net/sunrpc/xprtrdma/transport.c | 1 - net/sunrpc/xprtsock.c | 2 -- 5 files changed, 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index ec451b8114b0..b9313c15ee3a 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -252,7 +252,6 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); req->rq_reply_bytes_recvd = 0; - req->rq_bytes_sent = 0; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(req->rq_private_buf)); req->rq_xid = xid; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index b95f4452dbc6..1587728f40d1 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1602,7 +1602,6 @@ xprt_request_init(struct rpc_task *task) req->rq_buffer = NULL; req->rq_xid = xprt_alloc_xid(xprt); xprt_init_connect_cookie(req, xprt); - req->rq_bytes_sent = 0; req->rq_snd_buf.len = 0; req->rq_snd_buf.buflen = 0; req->rq_rcv_buf.len = 0; @@ -1752,7 +1751,6 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task) */ xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + xbufp->tail[0].iov_len; - req->rq_bytes_sent = 0; } #endif diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 0de9b3e63770..749452724e6e 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -267,7 +267,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, /* Prepare rqst */ rqst->rq_reply_bytes_recvd = 0; - rqst->rq_bytes_sent = 0; rqst->rq_xid = *p; rqst->rq_private_buf.len = size; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index fbc171ebfe91..3c490172d383 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -738,7 +738,6 @@ xprt_rdma_send_request(struct rpc_rqst *rqst) goto drop_connection; rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; - rqst->rq_bytes_sent = 0; /* An RPC with no reply will throw off credit accounting, * so drop the connection to reset the credit grant. diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index da45bb1e931e..5d22c175c7dd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -951,7 +951,6 @@ static int xs_local_send_request(struct rpc_rqst *req) req->rq_bytes_sent = transport->xmit.offset; if (likely(req->rq_bytes_sent >= req->rq_slen)) { req->rq_xmit_bytes_sent += transport->xmit.offset; - req->rq_bytes_sent = 0; transport->xmit.offset = 0; return 0; } @@ -1117,7 +1116,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req) req->rq_bytes_sent = transport->xmit.offset; if (likely(req->rq_bytes_sent >= req->rq_slen)) { req->rq_xmit_bytes_sent += transport->xmit.offset; - req->rq_bytes_sent = 0; transport->xmit.offset = 0; return 0; } -- cgit v1.2.3 From e3735c89985415f4fb509c67963e3d05969fbdb1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Jan 2019 17:53:52 +1100 Subject: SUNRPC: remove pointless test in unx_match() As reported by Dan Carpenter, this test for acred->cred being set is inconsistent with the dereference of the pointer a few lines earlier. An 'auth_cred' *always* has ->cred set - every place that creates one initializes this field, often as the first thing done. So remove this test. Reported-by: Dan Carpenter Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/auth_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 387f6b3ffbea..770e338a9b11 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -87,7 +87,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid)) return 0; - if (acred->cred && acred->cred->group_info != NULL) + if (acred->cred->group_info != NULL) groups = acred->cred->group_info->ngroups; if (groups > UNX_NGROUPS) groups = UNX_NGROUPS; -- cgit v1.2.3 From ae0535515161d3a131f2acabb7b883e3f3d45235 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 20 Feb 2019 14:56:20 -0500 Subject: SUNRPC: Don't reset the stream record info when the receive worker is running To ensure that the receive worker has exclusive access to the stream record info, we must not reset the contents other than when holding the transport->recv_mutex. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 5d22c175c7dd..ec0bb153c140 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -716,6 +716,11 @@ xs_stream_reset_connect(struct sock_xprt *transport) transport->recv.len = 0; transport->recv.copied = 0; transport->xmit.offset = 0; +} + +static void +xs_stream_start_connect(struct sock_xprt *transport) +{ transport->xprt.stat.connect_count++; transport->xprt.stat.connect_start = jiffies; } @@ -1255,6 +1260,8 @@ static void xs_reset_transport(struct sock_xprt *transport) xprt_clear_connected(xprt); write_unlock_bh(&sk->sk_callback_lock); xs_sock_reset_connection_flags(xprt); + /* Reset stream record info */ + xs_stream_reset_connect(transport); mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); @@ -1906,7 +1913,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, write_unlock_bh(&sk->sk_callback_lock); } - xs_stream_reset_connect(transport); + xs_stream_start_connect(transport); return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); } @@ -2264,8 +2271,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_set_memalloc(xprt); - /* Reset TCP record info */ - xs_stream_reset_connect(transport); + xs_stream_start_connect(transport); /* Tell the socket layer to start connecting... */ set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); -- cgit v1.2.3 From e92053a52e68b841d1314ca04da7cbeb326ee1a0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Feb 2019 18:14:27 -0500 Subject: SUNRPC: Handle zero length fragments correctly A zero length fragment is really a bug, but let's ensure we don't go nuts when one turns up. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ec0bb153c140..492cec3f1451 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -405,8 +405,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, size_t want, seek_init = seek, offset = 0; ssize_t ret; - if (seek < buf->head[0].iov_len) { - want = min_t(size_t, count, buf->head[0].iov_len); + want = min_t(size_t, count, buf->head[0].iov_len); + if (seek < want) { ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek); if (ret <= 0) goto sock_err; @@ -417,8 +417,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, goto out; seek = 0; } else { - seek -= buf->head[0].iov_len; - offset += buf->head[0].iov_len; + seek -= want; + offset += want; } want = xs_alloc_sparse_pages(buf, @@ -443,8 +443,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, offset += want; } - if (seek < buf->tail[0].iov_len) { - want = min_t(size_t, count - offset, buf->tail[0].iov_len); + want = min_t(size_t, count - offset, buf->tail[0].iov_len); + if (seek < want) { ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek); if (ret <= 0) goto sock_err; @@ -454,7 +454,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, if (ret != want) goto out; } else - offset += buf->tail[0].iov_len; + offset = seek_init; ret = -EMSGSIZE; out: *read = offset - seek_init; @@ -482,6 +482,14 @@ xs_read_stream_request_done(struct sock_xprt *transport) return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT); } +static void +xs_read_stream_check_eor(struct sock_xprt *transport, + struct msghdr *msg) +{ + if (xs_read_stream_request_done(transport)) + msg->msg_flags |= MSG_EOR; +} + static ssize_t xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, int flags, struct rpc_rqst *req) @@ -493,17 +501,24 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, xs_read_header(transport, buf); want = transport->recv.len - transport->recv.offset; - ret = xs_read_xdr_buf(transport->sock, msg, flags, buf, - transport->recv.copied + want, transport->recv.copied, - &read); - transport->recv.offset += read; - transport->recv.copied += read; + if (want != 0) { + ret = xs_read_xdr_buf(transport->sock, msg, flags, buf, + transport->recv.copied + want, + transport->recv.copied, + &read); + transport->recv.offset += read; + transport->recv.copied += read; + } else + read = 0; + if (transport->recv.offset == transport->recv.len) { - if (xs_read_stream_request_done(transport)) - msg->msg_flags |= MSG_EOR; + xs_read_stream_check_eor(transport, msg); return read; } + if (want == 0) + return 0; + switch (ret) { default: break; -- cgit v1.2.3 From 727fcc64a0f8d346bdd60a7c92e9e720228ce037 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Feb 2019 16:53:04 -0500 Subject: SUNRPC: Don't suppress socket errors when a message read completes If the message read completes, but the socket returned an error condition, we should ensure to propagate that error. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 492cec3f1451..e51716e88899 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -508,13 +508,10 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, &read); transport->recv.offset += read; transport->recv.copied += read; - } else - read = 0; + } - if (transport->recv.offset == transport->recv.len) { + if (transport->recv.offset == transport->recv.len) xs_read_stream_check_eor(transport, msg); - return read; - } if (want == 0) return 0; -- cgit v1.2.3 From 5f52a9d429b8ba8f9d669730adf16bc534eb74ea Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Feb 2019 09:31:47 -0500 Subject: SUNRPC: Initiate a connection close on an ESHUTDOWN error in stream receive If the client stream receive code receives an ESHUTDOWN error either because the server closed the connection, or because it sent a callback which cannot be processed, then we should shut down the connection. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e51716e88899..a3aadc04808f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -705,7 +705,10 @@ static void xs_stream_data_receive(struct sock_xprt *transport) read += ret; cond_resched(); } - xs_poll_check_readable(transport); + if (ret == -ESHUTDOWN) + kernel_sock_shutdown(transport->sock, SHUT_RDWR); + else + xs_poll_check_readable(transport); out: mutex_unlock(&transport->recv_mutex); trace_xs_stream_read_data(&transport->xprt, ret, read); -- cgit v1.2.3 From e791f8e9380d945ee9ed75aba2d65a9369f01820 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Feb 2019 12:39:40 -0500 Subject: SUNRPC: Convert xs_send_kvec() to use iov_iter_kvec() Prepare to the socket transmission code to use iov_iter. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a3aadc04808f..e1546dd6aacc 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -742,21 +742,21 @@ xs_stream_start_connect(struct sock_xprt *transport) #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) -static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more) +static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek) { - struct msghdr msg = { - .msg_name = addr, - .msg_namelen = addrlen, - .msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0), - }; - struct kvec iov = { - .iov_base = vec->iov_base + base, - .iov_len = vec->iov_len - base, - }; + if (seek) + iov_iter_advance(&msg->msg_iter, seek); + return sock_sendmsg(sock, msg); +} - if (iov.iov_len != 0) - return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); - return kernel_sendmsg(sock, &msg, NULL, 0, 0); +static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek) +{ + if (!vec) { + iov_iter_kvec(&msg->msg_iter, WRITE, NULL, 0, 0); + return sock_sendmsg(sock, msg); + } + iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len); + return xs_sendmsg(sock, msg, seek); } static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p) @@ -810,6 +810,11 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i */ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p) { + struct msghdr msg = { + .msg_name = addr, + .msg_namelen = addrlen, + .msg_flags = XS_SENDMSG_FLAGS | MSG_MORE, + }; unsigned int remainder = xdr->len - base; int err = 0; int sent = 0; @@ -825,7 +830,9 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, if (base < xdr->head[0].iov_len || addr != NULL) { unsigned int len = xdr->head[0].iov_len - base; remainder -= len; - err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0); + if (remainder == 0) + msg.msg_flags &= ~MSG_MORE; + err = xs_send_kvec(sock, &msg, &xdr->head[0], base); if (remainder == 0 || err != len) goto out; *sent_p += err; @@ -846,7 +853,8 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, if (base >= xdr->tail[0].iov_len) return 0; - err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0); + msg.msg_flags &= ~MSG_MORE; + err = xs_send_kvec(sock, &msg, &xdr->tail[0], base); out: if (err > 0) { *sent_p += err; -- cgit v1.2.3 From 0472e476604998c127f3c80d291113e77c5676ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Feb 2019 13:00:13 -0500 Subject: SUNRPC: Convert socket page send code to use iov_iter() Simplify the page send code using iov_iter and bvecs. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 1 + net/sunrpc/xprtsock.c | 49 +++++++++++++------------------------------------ 2 files changed, 14 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1587728f40d1..2af6be9d6574 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1723,6 +1723,7 @@ void xprt_release(struct rpc_task *task) xprt->ops->buf_free(task); xprt_inject_disconnect(xprt); xdr_free_bvec(&req->rq_rcv_buf); + xdr_free_bvec(&req->rq_snd_buf); if (req->rq_cred != NULL) put_rpccred(req->rq_cred); task->tk_rqstp = NULL; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e1546dd6aacc..35d1e81b6e5e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -759,42 +759,18 @@ static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *ve return xs_sendmsg(sock, msg, seek); } -static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p) +static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base) { - ssize_t (*do_sendpage)(struct socket *sock, struct page *page, - int offset, size_t size, int flags); - struct page **ppage; - unsigned int remainder; int err; - remainder = xdr->page_len - base; - base += xdr->page_base; - ppage = xdr->pages + (base >> PAGE_SHIFT); - base &= ~PAGE_MASK; - do_sendpage = sock->ops->sendpage; - if (!zerocopy) - do_sendpage = sock_no_sendpage; - for(;;) { - unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder); - int flags = XS_SENDMSG_FLAGS; + err = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (err < 0) + return err; - remainder -= len; - if (more) - flags |= MSG_MORE; - if (remainder != 0) - flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE; - err = do_sendpage(sock, *ppage, base, len, flags); - if (remainder == 0 || err != len) - break; - *sent_p += err; - ppage++; - base = 0; - } - if (err > 0) { - *sent_p += err; - err = 0; - } - return err; + iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, + xdr_buf_pagecount(xdr), + xdr->page_len + xdr->page_base); + return xs_sendmsg(sock, msg, base + xdr->page_base); } /** @@ -817,7 +793,6 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, }; unsigned int remainder = xdr->len - base; int err = 0; - int sent = 0; if (unlikely(!sock)) return -ENOTSOCK; @@ -843,10 +818,12 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, if (base < xdr->page_len) { unsigned int len = xdr->page_len - base; remainder -= len; - err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent); - *sent_p += sent; - if (remainder == 0 || sent != len) + if (remainder == 0) + msg.msg_flags &= ~MSG_MORE; + err = xs_send_pagedata(sock, &msg, xdr, base); + if (remainder == 0 || err != len) goto out; + *sent_p += err; base = 0; } else base -= xdr->page_len; -- cgit v1.2.3 From c87dc4c73bb04cb3f86c7d60f9f576eb08514ad8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Feb 2019 13:13:40 -0500 Subject: SUNRPC: Further cleanups of xs_sendpages() Now that we send the pages using a struct msghdr, instead of using sendpage(), we no longer need to 'prime the socket' with an address for unconnected UDP messages. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 35d1e81b6e5e..9c6eb76b3f46 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -751,10 +751,6 @@ static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek) static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek) { - if (!vec) { - iov_iter_kvec(&msg->msg_iter, WRITE, NULL, 0, 0); - return sock_sendmsg(sock, msg); - } iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len); return xs_sendmsg(sock, msg, seek); } @@ -797,12 +793,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, if (unlikely(!sock)) return -ENOTSOCK; - if (base != 0) { - addr = NULL; - addrlen = 0; - } - - if (base < xdr->head[0].iov_len || addr != NULL) { + if (base < xdr->head[0].iov_len) { unsigned int len = xdr->head[0].iov_len - base; remainder -= len; if (remainder == 0) -- cgit v1.2.3 From 6f903b111e33c4f0c349402efbd5319f52362f8e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Feb 2019 13:21:38 -0500 Subject: SUNRPC: Remove the redundant 'zerocopy' argument to xs_sendpages() Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9c6eb76b3f46..8fa74c8a4fd8 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -776,11 +776,10 @@ static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_ * @addrlen: UDP only -- length of destination address * @xdr: buffer containing this request * @base: starting position in the buffer - * @zerocopy: true if it is safe to use sendpage() * @sent_p: return the total number of bytes successfully queued for sending * */ -static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p) +static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, int *sent_p) { struct msghdr msg = { .msg_name = addr, @@ -935,7 +934,7 @@ static int xs_local_send_request(struct rpc_rqst *req) req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, NULL, 0, xdr, transport->xmit.offset, - true, &sent); + &sent); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - transport->xmit.offset, status); @@ -1002,7 +1001,7 @@ static int xs_udp_send_request(struct rpc_rqst *req) req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, - xdr, 0, true, &sent); + xdr, 0, &sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len, status); @@ -1066,7 +1065,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req) struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; - bool zerocopy = true; bool vm_wait = false; int status; int sent; @@ -1083,12 +1081,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req) xs_pktdump("packet data:", req->rq_svec->iov_base, req->rq_svec->iov_len); - /* Don't use zero copy if this is a resend. If the RPC call - * completes while the socket holds a reference to the pages, - * then we may end up resending corrupted data. - */ - if (req->rq_task->tk_flags & RPC_TASK_SENT) - zerocopy = false; if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state)) xs_tcp_set_socket_timeouts(xprt, transport->sock); @@ -1101,7 +1093,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) sent = 0; status = xs_sendpages(transport->sock, NULL, 0, xdr, transport->xmit.offset, - zerocopy, &sent); + &sent); dprintk("RPC: xs_tcp_send_request(%u) = %d\n", xdr->len - transport->xmit.offset, status); -- cgit v1.2.3 From a73881c96d73ee72b7dbbd38a6eeef66182a8ef7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Feb 2019 06:33:02 -0500 Subject: SUNRPC: Fix an Oops in udp_poll() udp_poll() checks the struct file for the O_NONBLOCK flag, so we must not call it with a NULL file pointer. Fixes: 0ffe86f48026 ("SUNRPC: Use poll() to fix up the socket requeue races") Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 1 + net/sunrpc/xprtsock.c | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 458bfe0137f5..b81d0b3e0799 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -26,6 +26,7 @@ struct sock_xprt { */ struct socket * sock; struct sock * inet; + struct file * file; /* * State of TCP reply receive diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 53de72d2dded..e829036ed81f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -670,7 +670,8 @@ out_err: static __poll_t xs_poll_socket(struct sock_xprt *transport) { - return transport->sock->ops->poll(NULL, transport->sock, NULL); + return transport->sock->ops->poll(transport->file, transport->sock, + NULL); } static bool xs_poll_socket_readable(struct sock_xprt *transport) @@ -1253,6 +1254,7 @@ static void xs_reset_transport(struct sock_xprt *transport) struct socket *sock = transport->sock; struct sock *sk = transport->inet; struct rpc_xprt *xprt = &transport->xprt; + struct file *filp = transport->file; if (sk == NULL) return; @@ -1266,6 +1268,7 @@ static void xs_reset_transport(struct sock_xprt *transport) write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; transport->sock = NULL; + transport->file = NULL; sk->sk_user_data = NULL; @@ -1278,7 +1281,7 @@ static void xs_reset_transport(struct sock_xprt *transport) mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); - sock_release(sock); + fput(filp); xprt_disconnect_done(xprt); } @@ -1873,6 +1876,7 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, struct sock_xprt *transport, int family, int type, int protocol, bool reuseport) { + struct file *filp; struct socket *sock; int err; @@ -1893,6 +1897,11 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, goto out; } + filp = sock_alloc_file(sock, O_NONBLOCK, NULL); + if (IS_ERR(filp)) + return ERR_CAST(filp); + transport->file = filp; + return sock; out: return ERR_PTR(err); @@ -1938,6 +1947,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, static int xs_local_setup_socket(struct sock_xprt *transport) { struct rpc_xprt *xprt = &transport->xprt; + struct file *filp; struct socket *sock; int status = -EIO; @@ -1950,6 +1960,13 @@ static int xs_local_setup_socket(struct sock_xprt *transport) } xs_reclassify_socket(AF_LOCAL, sock); + filp = sock_alloc_file(sock, O_NONBLOCK, NULL); + if (IS_ERR(filp)) { + status = PTR_ERR(filp); + goto out; + } + transport->file = filp; + dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); -- cgit v1.2.3 From a79f194aa4879e9baad118c3f8bb2ca24dbef765 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 27 Feb 2019 15:37:36 -0500 Subject: NFSv4/flexfiles: Abort I/O early if the layout segment was invalidated If a layout segment gets invalidated while a pNFS I/O operation is queued for transmission, then we ideally want to abort immediately. This is particularly the case when there is a large number of I/O related RPCs queued in the RPC layer, and the layout segment gets invalidated due to an ENOSPC error, or an EACCES (because the client was fenced). We may end up forced to spam the MDS with a lot of otherwise unnecessary LAYOUTERRORs after that I/O fails. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 17 +++++++++++++++++ include/linux/sunrpc/sched.h | 1 + net/sunrpc/xprt.c | 7 +++++++ 3 files changed, 25 insertions(+) (limited to 'net') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 244a03c22b31..a8e9bdd978e7 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1071,6 +1071,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, break; case -NFS4ERR_RETRY_UNCACHED_REP: break; + case -EAGAIN: + return -NFS4ERR_RESET_TO_PNFS; /* Invalidate Layout errors */ case -NFS4ERR_PNFS_NO_LAYOUT: case -ESTALE: /* mapped NFS4ERR_STALE */ @@ -1131,6 +1133,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, case -EBADHANDLE: case -ELOOP: case -ENOSPC: + case -EAGAIN: break; case -EJUKEBOX: nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); @@ -1369,6 +1372,16 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) ff_layout_read_prepare_common(task, hdr); } +static void +ff_layout_io_prepare_transmit(struct rpc_task *task, + void *data) +{ + struct nfs_pgio_header *hdr = data; + + if (!pnfs_is_valid_lseg(hdr->lseg)) + rpc_exit(task, -EAGAIN); +} + static void ff_layout_read_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; @@ -1657,6 +1670,7 @@ static void ff_layout_commit_release(void *data) static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { .rpc_call_prepare = ff_layout_read_prepare_v3, + .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, .rpc_call_done = ff_layout_read_call_done, .rpc_count_stats = ff_layout_read_count_stats, .rpc_release = ff_layout_read_release, @@ -1664,6 +1678,7 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { .rpc_call_prepare = ff_layout_read_prepare_v4, + .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, .rpc_call_done = ff_layout_read_call_done, .rpc_count_stats = ff_layout_read_count_stats, .rpc_release = ff_layout_read_release, @@ -1671,6 +1686,7 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { .rpc_call_prepare = ff_layout_write_prepare_v3, + .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, .rpc_call_done = ff_layout_write_call_done, .rpc_count_stats = ff_layout_write_count_stats, .rpc_release = ff_layout_write_release, @@ -1678,6 +1694,7 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { .rpc_call_prepare = ff_layout_write_prepare_v4, + .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, .rpc_call_done = ff_layout_write_call_done, .rpc_count_stats = ff_layout_write_count_stats, .rpc_release = ff_layout_write_release, diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 219aa3910a0c..52d41d0c1ae1 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -97,6 +97,7 @@ typedef void (*rpc_action)(struct rpc_task *); struct rpc_call_ops { void (*rpc_call_prepare)(struct rpc_task *, void *); + void (*rpc_call_prepare_transmit)(struct rpc_task *, void *); void (*rpc_call_done)(struct rpc_task *, void *); void (*rpc_count_stats)(struct rpc_task *, void *); void (*rpc_release)(void *); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1cf4e379be7b..e096c5a725df 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1330,6 +1330,13 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) status = -EBADMSG; goto out_dequeue; } + if (task->tk_ops->rpc_call_prepare_transmit) { + task->tk_ops->rpc_call_prepare_transmit(task, + task->tk_calldata); + status = task->tk_status; + if (status < 0) + goto out_dequeue; + } } /* -- cgit v1.2.3 From 12a3ad6184f86ba48f2269198c1a4520085f3002 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 2 Mar 2019 10:14:02 -0500 Subject: SUNRPC: Convert remaining GFP_NOIO, and GFP_NOWAIT sites in sunrpc Convert the remaining gfp_flags arguments in sunrpc to standard reclaiming allocations, now that we set memalloc_nofs_save() as appropriate. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 2 +- net/sunrpc/sched.c | 7 ++----- net/sunrpc/xprtsock.c | 4 ++-- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index c67e2ad151ae..3fd56c0c90ae 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1208,7 +1208,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred) struct gss_cred *new; /* Make a copy of the cred so that we can reference count it */ - new = kzalloc(sizeof(*gss_cred), GFP_NOIO); + new = kzalloc(sizeof(*gss_cred), GFP_NOFS); if (new) { struct auth_cred acred = { .cred = gss_cred->gc_base.cr_cred, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 2168d4d9c09f..f21557213a43 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -925,16 +925,13 @@ static void rpc_async_schedule(struct work_struct *work) * Most requests are 'small' (under 2KiB) and can be serviced from a * mempool, ensuring that NFS reads and writes can always proceed, * and that there is good locality of reference for these buffers. - * - * In order to avoid memory starvation triggering more writebacks of - * NFS requests, we avoid using GFP_KERNEL. */ int rpc_malloc(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; size_t size = rqst->rq_callsize + rqst->rq_rcvsize; struct rpc_buffer *buf; - gfp_t gfp = GFP_NOIO | __GFP_NOWARN; + gfp_t gfp = GFP_NOFS; if (RPC_IS_SWAPPER(task)) gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; @@ -1015,7 +1012,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta static struct rpc_task * rpc_alloc_task(void) { - return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO); + return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); } /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e829036ed81f..42f45d33dc56 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -423,7 +423,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, want = xs_alloc_sparse_pages(buf, min_t(size_t, count - offset, buf->page_len), - GFP_NOWAIT); + GFP_KERNEL); if (seek < want) { ret = xs_read_bvec(sock, msg, flags, buf->bvec, xdr_buf_pagecount(buf), @@ -909,7 +909,7 @@ static int xs_nospace(struct rpc_rqst *req) static void xs_stream_prepare_request(struct rpc_rqst *req) { - req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_NOIO); + req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL); } /* -- cgit v1.2.3 From 0d1bf3407c4ae887a464d136aaa5e9ef609834f0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 2 Mar 2019 13:12:22 -0500 Subject: SUNRPC: Allow dynamic allocation of back channel slots Now that the reads happen in a process context rather than a softirq, it is safe to allocate back channel slots using a reclaiming allocation. Signed-off-by: Trond Myklebust --- net/sunrpc/backchannel_rqst.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index b9313c15ee3a..c47d82622fd1 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -235,7 +235,8 @@ out: list_empty(&xprt->bc_pa_list) ? "true" : "false"); } -static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) +static struct rpc_rqst *xprt_get_bc_request(struct rpc_xprt *xprt, __be32 xid, + struct rpc_rqst *new) { struct rpc_rqst *req = NULL; @@ -243,10 +244,9 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) if (atomic_read(&xprt->bc_free_slots) <= 0) goto not_found; if (list_empty(&xprt->bc_pa_list)) { - req = xprt_alloc_bc_req(xprt, GFP_ATOMIC); - if (!req) + if (!new) goto not_found; - list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); + list_add_tail(&new->rq_bc_pa_list, &xprt->bc_pa_list); xprt->bc_alloc_count++; } req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, @@ -256,8 +256,8 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) sizeof(req->rq_private_buf)); req->rq_xid = xid; req->rq_connect_cookie = xprt->connect_cookie; -not_found: dprintk("RPC: backchannel req=%p\n", req); +not_found: return req; } @@ -320,18 +320,27 @@ void xprt_free_bc_rqst(struct rpc_rqst *req) */ struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid) { - struct rpc_rqst *req; - - spin_lock(&xprt->bc_pa_lock); - list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) { - if (req->rq_connect_cookie != xprt->connect_cookie) - continue; - if (req->rq_xid == xid) - goto found; - } - req = xprt_alloc_bc_request(xprt, xid); + struct rpc_rqst *req, *new = NULL; + + do { + spin_lock(&xprt->bc_pa_lock); + list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) { + if (req->rq_connect_cookie != xprt->connect_cookie) + continue; + if (req->rq_xid == xid) + goto found; + } + req = xprt_get_bc_request(xprt, xid, new); found: - spin_unlock(&xprt->bc_pa_lock); + spin_unlock(&xprt->bc_pa_lock); + if (new) { + if (req != new) + xprt_free_bc_rqst(new); + break; + } else if (req) + break; + new = xprt_alloc_bc_req(xprt, GFP_KERNEL); + } while (new); return req; } -- cgit v1.2.3 From ed7dc973bd91da234d93aff6d033a5206a6c9885 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Mar 2019 14:19:31 -0500 Subject: SUNRPC: Prevent thundering herd when the socket is not connected If the socket is not connected, then we want to initiate a reconnect rather that trying to transmit requests. If there is a large number of requests queued and waiting for the lock in call_transmit(), then it can take a while for one of the to loop back and retake the lock in call_connect. Fixes: 89f90fe1ad8b ("SUNRPC: Allow calls to xprt_transmit() to drain...") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 241e8423fd0c..7ab4da342ab5 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1807,7 +1807,12 @@ call_encode(struct rpc_task *task) xprt_request_enqueue_receive(task); xprt_request_enqueue_transmit(task); out: - task->tk_action = call_bind; + task->tk_action = call_transmit; + /* Check that the connection is OK */ + if (!xprt_bound(task->tk_xprt)) + task->tk_action = call_bind; + else if (!xprt_connected(task->tk_xprt)) + task->tk_action = call_connect; } /* @@ -1999,13 +2004,19 @@ call_transmit(struct rpc_task *task) { dprint_status(task); - task->tk_status = 0; + task->tk_action = call_transmit_status; if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) { if (!xprt_prepare_transmit(task)) return; - xprt_transmit(task); + task->tk_status = 0; + if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) { + if (!xprt_connected(task->tk_xprt)) { + task->tk_status = -ENOTCONN; + return; + } + xprt_transmit(task); + } } - task->tk_action = call_transmit_status; xprt_end_transmit(task); } @@ -2067,6 +2078,8 @@ call_transmit_status(struct rpc_task *task) case -EADDRINUSE: case -ENOTCONN: case -EPIPE: + task->tk_action = call_bind; + task->tk_status = 0; break; } } -- cgit v1.2.3 From 477687e1116ad16180caf8633dd830b296a5ce73 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Mar 2019 07:30:48 -0500 Subject: SUNRPC: Fix up RPC back channel transmission Now that transmissions happen through a queue, we require the RPC tasks to handle error conditions that may have been set while they were sleeping. The back channel does not currently do this, but assumes that any error condition happens during its own call to xprt_transmit(). The solution is to ensure that the back channel splits out the error handling just like the forward channel does. Fixes: 89f90fe1ad8b ("SUNRPC: Allow calls to xprt_transmit() to drain...") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 61 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7ab4da342ab5..b9558e10c6c1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -66,9 +66,6 @@ static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); static void call_transmit(struct rpc_task *task); -#if defined(CONFIG_SUNRPC_BACKCHANNEL) -static void call_bc_transmit(struct rpc_task *task); -#endif /* CONFIG_SUNRPC_BACKCHANNEL */ static void call_status(struct rpc_task *task); static void call_transmit_status(struct rpc_task *task); static void call_refresh(struct rpc_task *task); @@ -1133,6 +1130,8 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, EXPORT_SYMBOL_GPL(rpc_call_async); #if defined(CONFIG_SUNRPC_BACKCHANNEL) +static void call_bc_encode(struct rpc_task *task); + /** * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it @@ -1154,7 +1153,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) task = rpc_new_task(&task_setup_data); xprt_init_bc_request(req, task); - task->tk_action = call_bc_transmit; + task->tk_action = call_bc_encode; atomic_inc(&task->tk_count); WARN_ON_ONCE(atomic_read(&task->tk_count) != 2); rpc_execute(task); @@ -2085,6 +2084,16 @@ call_transmit_status(struct rpc_task *task) } #if defined(CONFIG_SUNRPC_BACKCHANNEL) +static void call_bc_transmit(struct rpc_task *task); +static void call_bc_transmit_status(struct rpc_task *task); + +static void +call_bc_encode(struct rpc_task *task) +{ + xprt_request_enqueue_transmit(task); + task->tk_action = call_bc_transmit; +} + /* * 5b. Send the backchannel RPC reply. On error, drop the reply. In * addition, disconnect on connectivity errors. @@ -2092,26 +2101,23 @@ call_transmit_status(struct rpc_task *task) static void call_bc_transmit(struct rpc_task *task) { - struct rpc_rqst *req = task->tk_rqstp; - - if (rpc_task_need_encode(task)) - xprt_request_enqueue_transmit(task); - if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) - goto out_wakeup; - - if (!xprt_prepare_transmit(task)) - goto out_retry; - - if (task->tk_status < 0) { - printk(KERN_NOTICE "RPC: Could not send backchannel reply " - "error: %d\n", task->tk_status); - goto out_done; + task->tk_action = call_bc_transmit_status; + if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) { + if (!xprt_prepare_transmit(task)) + return; + task->tk_status = 0; + xprt_transmit(task); } + xprt_end_transmit(task); +} - xprt_transmit(task); +static void +call_bc_transmit_status(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; - xprt_end_transmit(task); dprint_status(task); + switch (task->tk_status) { case 0: /* Success */ @@ -2125,8 +2131,14 @@ call_bc_transmit(struct rpc_task *task) case -ENOTCONN: case -EPIPE: break; + case -ENOBUFS: + rpc_delay(task, HZ>>2); + /* fall through */ + case -EBADSLT: case -EAGAIN: - goto out_retry; + task->tk_status = 0; + task->tk_action = call_bc_transmit; + return; case -ETIMEDOUT: /* * Problem reaching the server. Disconnect and let the @@ -2145,18 +2157,11 @@ call_bc_transmit(struct rpc_task *task) * We were unable to reply and will have to drop the * request. The server should reconnect and retransmit. */ - WARN_ON_ONCE(task->tk_status == -EAGAIN); printk(KERN_NOTICE "RPC: Could not send backchannel reply " "error: %d\n", task->tk_status); break; } -out_wakeup: - rpc_wake_up_queued_task(&req->rq_xprt->pending, task); -out_done: task->tk_action = rpc_exit_task; - return; -out_retry: - task->tk_status = 0; } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -- cgit v1.2.3 From 7b3fef8e4157ed424bcde039a60a730aa0dfb0eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Mar 2019 14:10:32 -0500 Subject: SUNRPC: Respect RPC call timeouts when retrying transmission Fix a regression where soft and softconn requests are not timing out as expected. Fixes: 89f90fe1ad8b ("SUNRPC: Allow calls to xprt_transmit() to drain...") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b9558e10c6c1..311029b7c33a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -79,6 +79,7 @@ static int rpc_encode_header(struct rpc_task *task, static int rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr); static int rpc_ping(struct rpc_clnt *clnt); +static void rpc_check_timeout(struct rpc_task *task); static void rpc_register_client(struct rpc_clnt *clnt) { @@ -1962,8 +1963,7 @@ call_connect_status(struct rpc_task *task) break; if (clnt->cl_autobind) { rpc_force_rebind(clnt); - task->tk_action = call_bind; - return; + goto out_retry; } /* fall through */ case -ECONNRESET: @@ -1983,16 +1983,19 @@ call_connect_status(struct rpc_task *task) /* fall through */ case -ENOTCONN: case -EAGAIN: - /* Check for timeouts before looping back to call_bind */ case -ETIMEDOUT: - task->tk_action = call_timeout; - return; + goto out_retry; case 0: clnt->cl_stats->netreconn++; task->tk_action = call_transmit; return; } rpc_exit(task, status); + return; +out_retry: + /* Check for timeouts before looping back to call_bind */ + task->tk_action = call_bind; + rpc_check_timeout(task); } /* @@ -2069,7 +2072,7 @@ call_transmit_status(struct rpc_task *task) trace_xprt_ping(task->tk_xprt, task->tk_status); rpc_exit(task, task->tk_status); - break; + return; } /* fall through */ case -ECONNRESET: @@ -2081,6 +2084,7 @@ call_transmit_status(struct rpc_task *task) task->tk_status = 0; break; } + rpc_check_timeout(task); } #if defined(CONFIG_SUNRPC_BACKCHANNEL) @@ -2217,7 +2221,7 @@ call_status(struct rpc_task *task) case -EPIPE: case -ENOTCONN: case -EAGAIN: - task->tk_action = call_encode; + task->tk_action = call_timeout; break; case -EIO: /* shutdown or soft timeout */ @@ -2231,20 +2235,13 @@ call_status(struct rpc_task *task) } } -/* - * 6a. Handle RPC timeout - * We do not release the request slot, so we keep using the - * same XID for all retransmits. - */ static void -call_timeout(struct rpc_task *task) +rpc_check_timeout(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - if (xprt_adjust_timeout(task->tk_rqstp) == 0) { - dprintk("RPC: %5u call_timeout (minor)\n", task->tk_pid); - goto retry; - } + if (xprt_adjust_timeout(task->tk_rqstp) == 0) + return; dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); task->tk_timeouts++; @@ -2280,10 +2277,19 @@ call_timeout(struct rpc_task *task) * event? RFC2203 requires the server to drop all such requests. */ rpcauth_invalcred(task); +} -retry: +/* + * 6a. Handle RPC timeout + * We do not release the request slot, so we keep using the + * same XID for all retransmits. + */ +static void +call_timeout(struct rpc_task *task) +{ task->tk_action = call_encode; task->tk_status = 0; + rpc_check_timeout(task); } /* -- cgit v1.2.3 From cea57789e4081870ac3498fbefabbbd0d0fd8434 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Mar 2019 16:06:47 -0500 Subject: SUNRPC: Clean up Replace remaining callers of call_timeout() with rpc_check_timeout(). Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 47 ++++++++++++++--------------------------------- 1 file changed, 14 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 311029b7c33a..a0e7322fc171 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -70,7 +70,6 @@ static void call_status(struct rpc_task *task); static void call_transmit_status(struct rpc_task *task); static void call_refresh(struct rpc_task *task); static void call_refreshresult(struct rpc_task *task); -static void call_timeout(struct rpc_task *task); static void call_connect(struct rpc_task *task); static void call_connect_status(struct rpc_task *task); @@ -1908,7 +1907,8 @@ call_bind_status(struct rpc_task *task) retry_timeout: task->tk_status = 0; - task->tk_action = call_timeout; + task->tk_action = call_encode; + rpc_check_timeout(task); } /* @@ -2197,10 +2197,8 @@ call_status(struct rpc_task *task) case -EHOSTUNREACH: case -ENETUNREACH: case -EPERM: - if (RPC_IS_SOFTCONN(task)) { - rpc_exit(task, status); - break; - } + if (RPC_IS_SOFTCONN(task)) + goto out_exit; /* * Delay any retries for 3 seconds, then handle as if it * were a timeout. @@ -2208,7 +2206,6 @@ call_status(struct rpc_task *task) rpc_delay(task, 3*HZ); /* fall through */ case -ETIMEDOUT: - task->tk_action = call_timeout; break; case -ECONNREFUSED: case -ECONNRESET: @@ -2221,18 +2218,21 @@ call_status(struct rpc_task *task) case -EPIPE: case -ENOTCONN: case -EAGAIN: - task->tk_action = call_timeout; break; case -EIO: /* shutdown or soft timeout */ - rpc_exit(task, status); - break; + goto out_exit; default: if (clnt->cl_chatty) printk("%s: RPC call returned error %d\n", clnt->cl_program->name, -status); - rpc_exit(task, status); + goto out_exit; } + task->tk_action = call_encode; + rpc_check_timeout(task); + return; +out_exit: + rpc_exit(task, status); } static void @@ -2279,19 +2279,6 @@ rpc_check_timeout(struct rpc_task *task) rpcauth_invalcred(task); } -/* - * 6a. Handle RPC timeout - * We do not release the request slot, so we keep using the - * same XID for all retransmits. - */ -static void -call_timeout(struct rpc_task *task) -{ - task->tk_action = call_encode; - task->tk_status = 0; - rpc_check_timeout(task); -} - /* * 7. Decode the RPC reply */ @@ -2329,16 +2316,8 @@ call_decode(struct rpc_task *task) WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf, sizeof(req->rq_rcv_buf)) != 0); - if (req->rq_rcv_buf.len < 12) { - if (!RPC_IS_SOFT(task)) { - task->tk_action = call_encode; - goto out_retry; - } - dprintk("RPC: %s: too small RPC reply size (%d bytes)\n", - clnt->cl_program->name, task->tk_status); - task->tk_action = call_timeout; + if (req->rq_rcv_buf.len < 12) goto out_retry; - } xdr_init_decode(&xdr, &req->rq_rcv_buf, req->rq_rcv_buf.head[0].iov_base, req); @@ -2361,6 +2340,8 @@ out_retry: xprt_conditional_disconnect(req->rq_xprt, req->rq_connect_cookie); } + task->tk_action = call_encode; + rpc_check_timeout(task); } } -- cgit v1.2.3 From 6b5f590016870a989a94d61c984e3bfc7a37bfe7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Mar 2019 16:20:11 -0500 Subject: SUNRPC: Remove redundant calls to RPC_IS_QUEUED() The RPC task wakeup calls all check for RPC_IS_QUEUED() before taking any locks. In addition, rpc_exit() already calls rpc_wake_up_queued_task(). Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 3 --- net/sunrpc/sched.c | 3 +-- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a0e7322fc171..9cf63e6339f4 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -833,9 +833,6 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) if (!(rovr->tk_flags & RPC_TASK_KILLED)) { rovr->tk_flags |= RPC_TASK_KILLED; rpc_exit(rovr, -EIO); - if (RPC_IS_QUEUED(rovr)) - rpc_wake_up_queued_task(rovr->tk_waitqueue, - rovr); } } spin_unlock(&clnt->cl_lock); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f21557213a43..28956c70100a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -785,8 +785,7 @@ void rpc_exit(struct rpc_task *task, int status) { task->tk_status = status; task->tk_action = rpc_exit_task; - if (RPC_IS_QUEUED(task)) - rpc_wake_up_queued_task(task->tk_waitqueue, task); + rpc_wake_up_queued_task(task->tk_waitqueue, task); } EXPORT_SYMBOL_GPL(rpc_exit); -- cgit v1.2.3 From 03e51d32da995030a16697038232171807eeb0f2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 10 Mar 2019 11:17:13 -0400 Subject: SUNRPC: Check whether the task was transmitted before rebind/reconnect Before initiating transport actions that require putting the task to sleep, such as rebinding or reconnecting, we should check whether or not the task was already transmitted. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9cf63e6339f4..67c955d8b21b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1811,6 +1811,24 @@ out: task->tk_action = call_connect; } +/* + * Helpers to check if the task was already transmitted, and + * to take action when that is the case. + */ +static bool +rpc_task_transmitted(struct rpc_task *task) +{ + return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); +} + +static void +rpc_task_handle_transmitted(struct rpc_task *task) +{ + xprt_end_transmit(task); + task->tk_action = call_transmit_status; + call_transmit_status(task); +} + /* * 4. Get the server port number if not yet set */ @@ -1819,6 +1837,11 @@ call_bind(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; + if (rpc_task_transmitted(task)) { + rpc_task_handle_transmitted(task); + return; + } + dprint_status(task); task->tk_action = call_connect; @@ -1837,6 +1860,11 @@ call_bind_status(struct rpc_task *task) { int status = -EIO; + if (rpc_task_transmitted(task)) { + rpc_task_handle_transmitted(task); + return; + } + if (task->tk_status >= 0) { dprint_status(task); task->tk_status = 0; @@ -1916,6 +1944,11 @@ call_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; + if (rpc_task_transmitted(task)) { + rpc_task_handle_transmitted(task); + return; + } + dprintk("RPC: %5u call_connect xprt %p %s connected\n", task->tk_pid, xprt, (xprt_connected(xprt) ? "is" : "is not")); @@ -1942,10 +1975,8 @@ call_connect_status(struct rpc_task *task) struct rpc_clnt *clnt = task->tk_client; int status = task->tk_status; - /* Check if the task was already transmitted */ - if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) { - xprt_end_transmit(task); - task->tk_action = call_transmit_status; + if (rpc_task_transmitted(task)) { + rpc_task_handle_transmitted(task); return; } @@ -2001,6 +2032,11 @@ out_retry: static void call_transmit(struct rpc_task *task) { + if (rpc_task_transmitted(task)) { + rpc_task_handle_transmitted(task); + return; + } + dprint_status(task); task->tk_action = call_transmit_status; -- cgit v1.2.3 From 009a82f6437490c262584d65a14094a818bcb747 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Mar 2019 12:07:17 -0500 Subject: SUNRPC: Micro-optimise when the task is known not to be sleeping In cases where we know the task is not sleeping, try to optimise away the indirect call to task->tk_action() by replacing it with a direct call. Only change tail calls, to allow gcc to perform tail call elimination. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 8 ++++ net/sunrpc/clnt.c | 99 +++++++++++++++++++++++++++++--------------- 2 files changed, 73 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 52d41d0c1ae1..ec861cd0cfe8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -304,4 +304,12 @@ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) } #endif /* CONFIG_SUNRPC_SWAP */ +static inline bool +rpc_task_need_resched(const struct rpc_task *task) +{ + if (RPC_IS_QUEUED(task) || task->tk_callback) + return true; + return false; +} + #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 67c955d8b21b..498dd6ad5bc5 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1540,6 +1540,7 @@ call_start(struct rpc_task *task) clnt->cl_stats->rpccnt++; task->tk_action = call_reserve; rpc_task_set_transport(task, clnt); + call_reserve(task); } /* @@ -1553,6 +1554,9 @@ call_reserve(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); + if (rpc_task_need_resched(task)) + return; + call_reserveresult(task); } static void call_retry_reserve(struct rpc_task *task); @@ -1575,6 +1579,7 @@ call_reserveresult(struct rpc_task *task) if (status >= 0) { if (task->tk_rqstp) { task->tk_action = call_refresh; + call_refresh(task); return; } @@ -1600,6 +1605,7 @@ call_reserveresult(struct rpc_task *task) /* fall through */ case -EAGAIN: /* woken up; retry */ task->tk_action = call_retry_reserve; + call_retry_reserve(task); return; case -EIO: /* probably a shutdown */ break; @@ -1622,6 +1628,9 @@ call_retry_reserve(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_reserveresult; xprt_retry_reserve(task); + if (rpc_task_need_resched(task)) + return; + call_reserveresult(task); } /* @@ -1636,6 +1645,9 @@ call_refresh(struct rpc_task *task) task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; rpcauth_refreshcred(task); + if (rpc_task_need_resched(task)) + return; + call_refreshresult(task); } /* @@ -1654,6 +1666,7 @@ call_refreshresult(struct rpc_task *task) case 0: if (rpcauth_uptodatecred(task)) { task->tk_action = call_allocate; + call_allocate(task); return; } /* Use rate-limiting and a max number of retries if refresh @@ -1672,6 +1685,7 @@ call_refreshresult(struct rpc_task *task) task->tk_cred_retry--; dprintk("RPC: %5u %s: retry refresh creds\n", task->tk_pid, __func__); + call_refresh(task); return; } dprintk("RPC: %5u %s: refresh creds failed with error %d\n", @@ -1697,8 +1711,10 @@ call_allocate(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_encode; - if (req->rq_buffer) + if (req->rq_buffer) { + call_encode(task); return; + } if (proc->p_proc != 0) { BUG_ON(proc->p_arglen == 0); @@ -1719,8 +1735,12 @@ call_allocate(struct rpc_task *task) status = xprt->ops->buf_alloc(task); xprt_inject_disconnect(xprt); - if (status == 0) + if (status == 0) { + if (rpc_task_need_resched(task)) + return; + call_encode(task); return; + } if (status != -ENOMEM) { rpc_exit(task, status); return; @@ -1803,12 +1823,8 @@ call_encode(struct rpc_task *task) xprt_request_enqueue_receive(task); xprt_request_enqueue_transmit(task); out: - task->tk_action = call_transmit; - /* Check that the connection is OK */ - if (!xprt_bound(task->tk_xprt)) - task->tk_action = call_bind; - else if (!xprt_connected(task->tk_xprt)) - task->tk_action = call_connect; + task->tk_action = call_bind; + call_bind(task); } /* @@ -1842,14 +1858,17 @@ call_bind(struct rpc_task *task) return; } + if (xprt_bound(xprt)) { + task->tk_action = call_connect; + call_connect(task); + return; + } + dprint_status(task); - task->tk_action = call_connect; - if (!xprt_bound(xprt)) { - task->tk_action = call_bind_status; - task->tk_timeout = xprt->bind_timeout; - xprt->ops->rpcbind(task); - } + task->tk_action = call_bind_status; + task->tk_timeout = xprt->bind_timeout; + xprt->ops->rpcbind(task); } /* @@ -1869,6 +1888,7 @@ call_bind_status(struct rpc_task *task) dprint_status(task); task->tk_status = 0; task->tk_action = call_connect; + call_connect(task); return; } @@ -1949,21 +1969,24 @@ call_connect(struct rpc_task *task) return; } + if (xprt_connected(xprt)) { + task->tk_action = call_transmit; + call_transmit(task); + return; + } + dprintk("RPC: %5u call_connect xprt %p %s connected\n", task->tk_pid, xprt, (xprt_connected(xprt) ? "is" : "is not")); - task->tk_action = call_transmit; - if (!xprt_connected(xprt)) { - task->tk_action = call_connect_status; - if (task->tk_status < 0) - return; - if (task->tk_flags & RPC_TASK_NOCONNECT) { - rpc_exit(task, -ENOTCONN); - return; - } - xprt_connect(task); + task->tk_action = call_connect_status; + if (task->tk_status < 0) + return; + if (task->tk_flags & RPC_TASK_NOCONNECT) { + rpc_exit(task, -ENOTCONN); + return; } + xprt_connect(task); } /* @@ -2016,6 +2039,7 @@ call_connect_status(struct rpc_task *task) case 0: clnt->cl_stats->netreconn++; task->tk_action = call_transmit; + call_transmit(task); return; } rpc_exit(task, status); @@ -2040,19 +2064,20 @@ call_transmit(struct rpc_task *task) dprint_status(task); task->tk_action = call_transmit_status; + if (!xprt_prepare_transmit(task)) + return; + task->tk_status = 0; if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) { - if (!xprt_prepare_transmit(task)) + if (!xprt_connected(task->tk_xprt)) { + task->tk_status = -ENOTCONN; return; - task->tk_status = 0; - if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) { - if (!xprt_connected(task->tk_xprt)) { - task->tk_status = -ENOTCONN; - return; - } - xprt_transmit(task); } + xprt_transmit(task); } xprt_end_transmit(task); + if (rpc_task_need_resched(task)) + return; + call_transmit_status(task); } /* @@ -2067,8 +2092,12 @@ call_transmit_status(struct rpc_task *task) * Common case: success. Force the compiler to put this * test first. */ - if (task->tk_status == 0) { - xprt_request_wait_receive(task); + if (rpc_task_transmitted(task)) { + if (task->tk_status == 0) + xprt_request_wait_receive(task); + if (rpc_task_need_resched(task)) + return; + call_status(task); return; } @@ -2129,6 +2158,7 @@ call_bc_encode(struct rpc_task *task) { xprt_request_enqueue_transmit(task); task->tk_action = call_bc_transmit; + call_bc_transmit(task); } /* @@ -2219,6 +2249,7 @@ call_status(struct rpc_task *task) status = task->tk_status; if (status >= 0) { task->tk_action = call_decode; + call_decode(task); return; } -- cgit v1.2.3 From 4d6c671ace569d4b0d3f8d92ab3aef18a5d166bc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 10 Mar 2019 12:22:39 -0400 Subject: SUNRPC: Take the transport send lock before binding+connecting Before trying to bind a port, ensure we grab the send lock to ensure that we don't change the port while another task is busy transmitting requests. The connect code already takes the send lock in xprt_connect(), but it is harmless to take it before that. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 498dd6ad5bc5..4216fe33204a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1867,6 +1867,9 @@ call_bind(struct rpc_task *task) dprint_status(task); task->tk_action = call_bind_status; + if (!xprt_prepare_transmit(task)) + return; + task->tk_timeout = xprt->bind_timeout; xprt->ops->rpcbind(task); } @@ -1911,6 +1914,8 @@ call_bind_status(struct rpc_task *task) task->tk_rebind_retry--; rpc_delay(task, 3*HZ); goto retry_timeout; + case -EAGAIN: + goto retry_timeout; case -ETIMEDOUT: dprintk("RPC: %5u rpcbind request timed out\n", task->tk_pid); @@ -1952,7 +1957,7 @@ call_bind_status(struct rpc_task *task) retry_timeout: task->tk_status = 0; - task->tk_action = call_encode; + task->tk_action = call_bind; rpc_check_timeout(task); } @@ -1986,6 +1991,8 @@ call_connect(struct rpc_task *task) rpc_exit(task, -ENOTCONN); return; } + if (!xprt_prepare_transmit(task)) + return; xprt_connect(task); } -- cgit v1.2.3