From d67c72e6cce99eab5ab9d62c599e33e5141ff8b4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 1 Apr 2020 18:27:25 -0400 Subject: ceph: request expedited service on session's last cap flush When flushing a lot of caps to the MDS's at once (e.g. for syncfs), we can end up waiting a substantial amount of time for MDS replies, due to the fact that it may delay some of them so that it can batch them up together in a single journal transaction. This can lead to stalls when calling sync or syncfs. What we'd really like to do is request expedited service on the _last_ cap we're flushing back to the server. If the CHECK_CAPS_FLUSH flag is set on the request and the current inode was the last one on the session->s_cap_dirty list, then mark the request with CEPH_CLIENT_CAPS_SYNC. Note that this heuristic is not perfect. New inodes can race onto the list after we've started flushing, but it does seem to fix some common use cases. URL: https://tracker.ceph.com/issues/44744 Reported-by: Jan Fajerski Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index d5ad2434bb07..2558fd14126a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1997,6 +1997,7 @@ retry_locked: } for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { + int mflags = 0; struct cap_msg_args arg; cap = rb_entry(p, struct ceph_cap, ci_node); @@ -2128,6 +2129,9 @@ ack: flushing = ci->i_dirty_caps; flush_tid = __mark_caps_flushing(inode, session, false, &oldest_flush_tid); + if (flags & CHECK_CAPS_FLUSH && + list_empty(&session->s_cap_dirty)) + mflags |= CEPH_CLIENT_CAPS_SYNC; } else { flushing = 0; flush_tid = 0; @@ -2138,8 +2142,8 @@ ack: mds = cap->mds; /* remember mds, so we don't repeat */ - __prep_cap(&arg, cap, CEPH_CAP_OP_UPDATE, 0, cap_used, want, - retain, flushing, flush_tid, oldest_flush_tid); + __prep_cap(&arg, cap, CEPH_CAP_OP_UPDATE, mflags, cap_used, + want, retain, flushing, flush_tid, oldest_flush_tid); spin_unlock(&ci->i_ceph_lock); __send_cap(mdsc, &arg, ci); -- cgit v1.2.3