diff options
Diffstat (limited to 'net/sunrpc')
-rw-r--r-- | net/sunrpc/rpc_pipe.c | 1 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 35 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/backchannel.c | 4 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 12 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 67 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 4 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 6 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 15 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 17 |
9 files changed, 63 insertions, 98 deletions
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 8241f5a4a01c..09c000d490a1 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -478,6 +478,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode) inode->i_fop = &simple_dir_operations; inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); + break; default: break; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5a809c64dc7b..2e2f007dfc9f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1078,9 +1078,8 @@ static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec, * In addition, the logic assumes that * .bv_len is never larger * than PAGE_SIZE. */ -static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, - struct xdr_buf *xdr, rpc_fraghdr marker, - unsigned int *sentp) +static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr, + rpc_fraghdr marker, unsigned int *sentp) { const struct kvec *head = xdr->head; const struct kvec *tail = xdr->tail; @@ -1088,21 +1087,22 @@ static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, .iov_base = &marker, .iov_len = sizeof(marker), }; - int flags, ret; + struct msghdr msg = { + .msg_flags = 0, + }; + int ret; *sentp = 0; xdr_alloc_bvec(xdr, GFP_KERNEL); - msg->msg_flags = MSG_MORE; - ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len); + ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len); if (ret < 0) return ret; *sentp += ret; if (ret != rm.iov_len) return -EAGAIN; - flags = head->iov_len < xdr->len ? MSG_MORE | MSG_SENDPAGE_NOTLAST : 0; - ret = svc_tcp_send_kvec(sock, head, flags); + ret = svc_tcp_send_kvec(sock, head, 0); if (ret < 0) return ret; *sentp += ret; @@ -1116,15 +1116,11 @@ static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, bvec = xdr->bvec + (xdr->page_base >> PAGE_SHIFT); offset = offset_in_page(xdr->page_base); remaining = xdr->page_len; - flags = MSG_MORE | MSG_SENDPAGE_NOTLAST; while (remaining > 0) { - if (remaining <= PAGE_SIZE && tail->iov_len == 0) - flags = 0; - len = min(remaining, bvec->bv_len - offset); ret = kernel_sendpage(sock, bvec->bv_page, bvec->bv_offset + offset, - len, flags); + len, 0); if (ret < 0) return ret; *sentp += ret; @@ -1163,26 +1159,28 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) struct xdr_buf *xdr = &rqstp->rq_res; rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len); - struct msghdr msg = { - .msg_flags = 0, - }; unsigned int sent; int err; svc_tcp_release_rqst(rqstp); + atomic_inc(&svsk->sk_sendqlen); mutex_lock(&xprt->xpt_mutex); if (svc_xprt_is_dead(xprt)) goto out_notconn; - err = svc_tcp_sendmsg(svsk->sk_sock, &msg, xdr, marker, &sent); + tcp_sock_set_cork(svsk->sk_sk, true); + err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent); xdr_free_bvec(xdr); trace_svcsock_tcp_send(xprt, err < 0 ? err : sent); if (err < 0 || sent != (xdr->len + sizeof(marker))) goto out_close; + if (atomic_dec_and_test(&svsk->sk_sendqlen)) + tcp_sock_set_cork(svsk->sk_sk, false); mutex_unlock(&xprt->xpt_mutex); return sent; out_notconn: + atomic_dec(&svsk->sk_sendqlen); mutex_unlock(&xprt->xpt_mutex); return -ENOTCONN; out_close: @@ -1192,6 +1190,7 @@ out_close: (err < 0) ? err : sent, xdr->len); set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); + atomic_dec(&svsk->sk_sendqlen); mutex_unlock(&xprt->xpt_mutex); return -EAGAIN; } @@ -1261,7 +1260,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_datalen = 0; memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); - tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + tcp_sock_set_nodelay(sk); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); switch (sk->sk_state) { diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 946edf2db646..a249837d6a55 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2015-2020, Oracle and/or its affiliates. * - * Support for backward direction RPCs on RPC/RDMA. + * Support for reverse-direction RPCs on RPC/RDMA. */ #include <linux/sunrpc/xprt.h> @@ -208,7 +208,7 @@ create_req: } /** - * rpcrdma_bc_receive_call - Handle a backward direction call + * rpcrdma_bc_receive_call - Handle a reverse-direction Call * @r_xprt: transport receiving the call * @rep: receive buffer containing the call * diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index baca49fe83af..766a1048a48a 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -306,20 +306,14 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, if (nsegs > ep->re_max_fr_depth) nsegs = ep->re_max_fr_depth; for (i = 0; i < nsegs;) { - if (seg->mr_page) - sg_set_page(&mr->mr_sg[i], - seg->mr_page, - seg->mr_len, - offset_in_page(seg->mr_offset)); - else - sg_set_buf(&mr->mr_sg[i], seg->mr_offset, - seg->mr_len); + sg_set_page(&mr->mr_sg[i], seg->mr_page, + seg->mr_len, seg->mr_offset); ++seg; ++i; if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS) continue; - if ((i < nsegs && offset_in_page(seg->mr_offset)) || + if ((i < nsegs && seg->mr_offset) || offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 8f5d0cb68360..292f066d006e 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -204,9 +204,7 @@ rpcrdma_alloc_sparse_pages(struct xdr_buf *buf) return 0; } -/* Split @vec on page boundaries into SGEs. FMR registers pages, not - * a byte range. Other modes coalesce these SGEs into a single MR - * when they can. +/* Convert @vec to a single SGL element. * * Returns pointer to next available SGE, and bumps the total number * of SGEs consumed. @@ -215,22 +213,11 @@ static struct rpcrdma_mr_seg * rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, unsigned int *n) { - u32 remaining, page_offset; - char *base; - - base = vec->iov_base; - page_offset = offset_in_page(base); - remaining = vec->iov_len; - while (remaining) { - seg->mr_page = NULL; - seg->mr_offset = base; - seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining); - remaining -= seg->mr_len; - base += seg->mr_len; - ++seg; - ++(*n); - page_offset = 0; - } + seg->mr_page = virt_to_page(vec->iov_base); + seg->mr_offset = offset_in_page(vec->iov_base); + seg->mr_len = vec->iov_len; + ++seg; + ++(*n); return seg; } @@ -259,7 +246,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, page_base = offset_in_page(xdrbuf->page_base); while (len) { seg->mr_page = *ppages; - seg->mr_offset = (char *)page_base; + seg->mr_offset = page_base; seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len); len -= seg->mr_len; ++ppages; @@ -268,10 +255,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, page_base = 0; } - /* When encoding a Read chunk, the tail iovec contains an - * XDR pad and may be omitted. - */ - if (type == rpcrdma_readch && r_xprt->rx_ep->re_implicit_roundup) + if (type == rpcrdma_readch) goto out; /* When encoding a Write chunk, some servers need to see an @@ -283,7 +267,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, goto out; if (xdrbuf->tail[0].iov_len) - seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n); + rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n); out: if (unlikely(n > RPCRDMA_MAX_SEGS)) @@ -644,9 +628,8 @@ out_mapping_err: return false; } -/* The tail iovec may include an XDR pad for the page list, - * as well as additional content, and may not reside in the - * same page as the head iovec. +/* The tail iovec might not reside in the same page as the + * head iovec. */ static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req, struct xdr_buf *xdr, @@ -764,27 +747,19 @@ static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, struct xdr_buf *xdr) { + struct kvec *tail = &xdr->tail[0]; + if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len)) return false; - /* If there is a Read chunk, the page list is being handled + /* If there is a Read chunk, the page list is handled * via explicit RDMA, and thus is skipped here. */ - /* Do not include the tail if it is only an XDR pad */ - if (xdr->tail[0].iov_len > 3) { - unsigned int page_base, len; - - /* If the content in the page list is an odd length, - * xdr_write_pages() adds a pad at the beginning of - * the tail iovec. Force the tail's non-pad content to - * land at the next XDR position in the Send message. - */ - page_base = offset_in_page(xdr->tail[0].iov_base); - len = xdr->tail[0].iov_len; - page_base += len & 3; - len -= len & 3; - if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len)) + if (tail->iov_len) { + if (!rpcrdma_prepare_tail_iov(req, xdr, + offset_in_page(tail->iov_base), + tail->iov_len)) return false; kref_get(&req->rl_kref); } @@ -1164,14 +1139,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) */ p = xdr_inline_decode(xdr, 3 * sizeof(*p)); if (unlikely(!p)) - goto out_short; + return true; rpcrdma_bc_receive_call(r_xprt, rep); return true; - -out_short: - pr_warn("RPC/RDMA short backward direction call\n"); - return true; } #else /* CONFIG_SUNRPC_BACKCHANNEL */ { diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 63f8be974df2..4a1edbb4028e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2015-2018 Oracle. All rights reserved. * - * Support for backward direction RPCs on RPC/RDMA (server-side). + * Support for reverse-direction RPCs on RPC/RDMA (server-side). */ #include <linux/sunrpc/svc_rdma.h> @@ -59,7 +59,7 @@ out_unlock: spin_unlock(&xprt->queue_lock); } -/* Send a backwards direction RPC call. +/* Send a reverse-direction RPC Call. * * Caller holds the connection's mutex and has already marshaled * the RPC/RDMA request. diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index afba4e9d5425..c895f80df659 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -475,9 +475,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (!svc_rdma_post_recvs(newxprt)) goto errout; - /* Swap out the handler */ - newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler; - /* Construct RDMA-CM private message */ pmsg.cp_magic = rpcrdma_cmp_magic; pmsg.cp_version = RPCRDMA_CMP_VERSION; @@ -498,7 +495,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) } conn_param.private_data = &pmsg; conn_param.private_data_len = sizeof(pmsg); + rdma_lock_handler(newxprt->sc_cm_id); + newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler; ret = rdma_accept(newxprt->sc_cm_id, &conn_param); + rdma_unlock_handler(newxprt->sc_cm_id); if (ret) { trace_svcrdma_accept_err(newxprt, ret); goto errout; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 94b28657aeeb..fe3be985e239 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -98,9 +98,9 @@ struct rpcrdma_ep { atomic_t re_completion_ids; }; -/* Pre-allocate extra Work Requests for handling backward receives - * and sends. This is a fixed value because the Work Queues are - * allocated when the forward channel is set up, long before the +/* Pre-allocate extra Work Requests for handling reverse-direction + * Receives and Sends. This is a fixed value because the Work Queues + * are allocated when the forward channel is set up, long before the * backchannel is provisioned. This value is two times * NFS4_DEF_CB_SLOT_TABLE_SIZE. */ @@ -283,10 +283,11 @@ enum { RPCRDMA_MAX_IOV_SEGS, }; -struct rpcrdma_mr_seg { /* chunk descriptors */ - u32 mr_len; /* length of chunk or segment */ - struct page *mr_page; /* owning page, if any */ - char *mr_offset; /* kva if no page, else offset */ +/* Arguments for DMA mapping and registration */ +struct rpcrdma_mr_seg { + u32 mr_len; /* length of segment */ + struct page *mr_page; /* underlying struct page */ + u64 mr_offset; /* IN: page offset, OUT: iova */ }; /* The Send SGE array is provisioned to send a maximum size diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c56a66cdf4ac..e35760f238a4 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -829,7 +829,7 @@ xs_stream_record_marker(struct xdr_buf *xdr) * EAGAIN: The socket was blocked, please call again later to * complete the request * ENOTCONN: Caller needs to invoke connect logic then call again - * other: Some other error occured, the request was not sent + * other: Some other error occurred, the request was not sent */ static int xs_local_send_request(struct rpc_rqst *req) { @@ -1665,7 +1665,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) * This ensures that we can continue to establish TCP * connections even when all local ephemeral ports are already * a part of some TCP connection. This makes no difference - * for UDP sockets, but also doens't harm them. + * for UDP sockets, but also doesn't harm them. * * If we're asking for any reserved port (i.e. port == 0 && * transport->xprt.resvport == 1) xs_get_srcport above will @@ -1875,6 +1875,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport) xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; xprt_set_connected(xprt); + break; case -ENOBUFS: break; case -ENOENT: @@ -2276,10 +2277,8 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: - /* - * xs_tcp_force_close() wakes tasks with -EIO. - * We need to wake them first to ensure the - * correct error code. + /* xs_tcp_force_close() wakes tasks with a fixed error code. + * We need to wake them first to ensure the correct error code. */ xprt_wake_pending_tasks(xprt, status); xs_tcp_force_close(xprt); @@ -2380,7 +2379,7 @@ static void xs_error_handle(struct work_struct *work) } /** - * xs_local_print_stats - display AF_LOCAL socket-specifc stats + * xs_local_print_stats - display AF_LOCAL socket-specific stats * @xprt: rpc_xprt struct containing statistics * @seq: output file * @@ -2409,7 +2408,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) } /** - * xs_udp_print_stats - display UDP socket-specifc stats + * xs_udp_print_stats - display UDP socket-specific stats * @xprt: rpc_xprt struct containing statistics * @seq: output file * @@ -2433,7 +2432,7 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) } /** - * xs_tcp_print_stats - display TCP socket-specifc stats + * xs_tcp_print_stats - display TCP socket-specific stats * @xprt: rpc_xprt struct containing statistics * @seq: output file * |