diff options
Diffstat (limited to 'fs/ceph/mds_client.c')
-rw-r--r-- | fs/ceph/mds_client.c | 140 |
1 files changed, 90 insertions, 50 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 666a9f274832..ab69dcb70e8a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/fs.h> @@ -7,7 +8,6 @@ #include <linux/sched.h> #include <linux/debugfs.h> #include <linux/seq_file.h> -#include <linux/utsname.h> #include <linux/ratelimit.h> #include "super.h" @@ -408,7 +408,7 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc, { struct ceph_mds_session *session; - if (mds >= mdsc->max_sessions || mdsc->sessions[mds] == NULL) + if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) return NULL; session = mdsc->sessions[mds]; dout("lookup_mds_session %p %d\n", session, @@ -483,7 +483,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, dout("register_session realloc to %d\n", newmax); sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); - if (sa == NULL) + if (!sa) goto fail_realloc; if (mdsc->sessions) { memcpy(sa, mdsc->sessions, @@ -731,8 +731,16 @@ static int __choose_mds(struct ceph_mds_client *mdsc, inode = NULL; if (req->r_inode) { - inode = req->r_inode; - ihold(inode); + if (ceph_snap(req->r_inode) != CEPH_SNAPDIR) { + inode = req->r_inode; + ihold(inode); + } else { + /* req->r_dentry is non-null for LSSNAP request */ + rcu_read_lock(); + inode = get_nonsnap_parent(req->r_dentry); + rcu_read_unlock(); + dout("__choose_mds using snapdir's parent %p\n", inode); + } } else if (req->r_dentry) { /* ignore race with rename; old or new d_parent is okay */ struct dentry *parent; @@ -877,8 +885,8 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 void *p; const char* metadata[][2] = { - {"hostname", utsname()->nodename}, - {"kernel_version", utsname()->release}, + {"hostname", mdsc->nodename}, + {"kernel_version", init_utsname()->release}, {"entity_id", opt->name ? : ""}, {"root", fsopt->server_path ? : "/"}, {NULL, NULL} @@ -886,7 +894,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 /* Calculate serialized length of metadata */ metadata_bytes = 4; /* map length */ - for (i = 0; metadata[i][0] != NULL; ++i) { + for (i = 0; metadata[i][0]; ++i) { metadata_bytes += 8 + strlen(metadata[i][0]) + strlen(metadata[i][1]); metadata_key_count++; @@ -919,7 +927,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 ceph_encode_32(&p, metadata_key_count); /* Two length-prefixed strings for each entry in the map */ - for (i = 0; metadata[i][0] != NULL; ++i) { + for (i = 0; metadata[i][0]; ++i) { size_t const key_len = strlen(metadata[i][0]); size_t const val_len = strlen(metadata[i][1]); @@ -1031,22 +1039,23 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, * session caps */ -/* caller holds s_cap_lock, we drop it */ -static void cleanup_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) - __releases(session->s_cap_lock) +static void detach_cap_releases(struct ceph_mds_session *session, + struct list_head *target) { - LIST_HEAD(tmp_list); - list_splice_init(&session->s_cap_releases, &tmp_list); + lockdep_assert_held(&session->s_cap_lock); + + list_splice_init(&session->s_cap_releases, target); session->s_num_cap_releases = 0; - spin_unlock(&session->s_cap_lock); + dout("dispose_cap_releases mds%d\n", session->s_mds); +} - dout("cleanup_cap_releases mds%d\n", session->s_mds); - while (!list_empty(&tmp_list)) { +static void dispose_cap_releases(struct ceph_mds_client *mdsc, + struct list_head *dispose) +{ + while (!list_empty(dispose)) { struct ceph_cap *cap; /* zero out the in-progress message */ - cap = list_first_entry(&tmp_list, - struct ceph_cap, session_caps); + cap = list_first_entry(dispose, struct ceph_cap, session_caps); list_del(&cap->session_caps); ceph_put_cap(mdsc, cap); } @@ -1122,7 +1131,7 @@ static int iterate_session_caps(struct ceph_mds_session *session, spin_lock(&session->s_cap_lock); p = p->next; - if (cap->ci == NULL) { + if (!cap->ci) { dout("iterate_session_caps finishing cap %p removal\n", cap); BUG_ON(cap->session != session); @@ -1207,6 +1216,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, } spin_unlock(&mdsc->cap_dirty_lock); + if (atomic_read(&ci->i_filelock_ref) > 0) { + /* make further file lock syscall return -EIO */ + ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK; + pr_warn_ratelimited(" dropping file locks for %p %lld\n", + inode, ceph_ino(inode)); + } + if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); ci->i_prealloc_cap_flush = NULL; @@ -1236,6 +1252,8 @@ static void remove_session_caps(struct ceph_mds_session *session) { struct ceph_fs_client *fsc = session->s_mdsc->fsc; struct super_block *sb = fsc->sb; + LIST_HEAD(dispose); + dout("remove_session_caps on %p\n", session); iterate_session_caps(session, remove_session_caps_cb, fsc); @@ -1270,10 +1288,12 @@ static void remove_session_caps(struct ceph_mds_session *session) } // drop cap expires and unlock s_cap_lock - cleanup_cap_releases(session->s_mdsc, session); + detach_cap_releases(session, &dispose); BUG_ON(session->s_nr_caps > 0); BUG_ON(!list_empty(&session->s_cap_flushing)); + spin_unlock(&session->s_cap_lock); + dispose_cap_releases(session->s_mdsc, &dispose); } /* @@ -1454,6 +1474,11 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) goto out; if ((used | wanted) & CEPH_CAP_ANY_WR) goto out; + /* Note: it's possible that i_filelock_ref becomes non-zero + * after dropping auth caps. It doesn't hurt because reply + * of lock mds request will re-add auth caps. */ + if (atomic_read(&ci->i_filelock_ref) > 0) + goto out; } /* The inode has cached pages, but it's no longer used. * we can safely drop it */ @@ -1748,7 +1773,7 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, int len, pos; unsigned seq; - if (dentry == NULL) + if (!dentry) return ERR_PTR(-EINVAL); retry: @@ -1771,7 +1796,7 @@ retry: len--; /* no leading '/' */ path = kmalloc(len+1, GFP_NOFS); - if (path == NULL) + if (!path) return ERR_PTR(-ENOMEM); pos = len; path[pos] = 0; /* trailing null */ @@ -2819,7 +2844,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_mds_cap_reconnect v2; struct ceph_mds_cap_reconnect_v1 v1; } rec; - struct ceph_inode_info *ci; + struct ceph_inode_info *ci = cap->ci; struct ceph_reconnect_state *recon_state = arg; struct ceph_pagelist *pagelist = recon_state->pagelist; char *path; @@ -2828,8 +2853,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, u64 snap_follows; struct dentry *dentry; - ci = cap->ci; - dout(" adding %p ino %llx.%llx cap %p %lld %s\n", inode, ceph_vinop(inode), cap, cap->cap_id, ceph_cap_string(cap->issued)); @@ -2862,7 +2885,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, rec.v2.issued = cpu_to_le32(cap->issued); rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); rec.v2.pathbase = cpu_to_le64(pathbase); - rec.v2.flock_len = 0; + rec.v2.flock_len = (__force __le32) + ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); } else { rec.v1.cap_id = cpu_to_le64(cap->cap_id); rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); @@ -2875,7 +2899,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, } if (list_empty(&ci->i_cap_snaps)) { - snap_follows = 0; + snap_follows = ci->i_head_snapc ? ci->i_head_snapc->seq : 0; } else { struct ceph_cap_snap *capsnap = list_first_entry(&ci->i_cap_snaps, @@ -2886,26 +2910,37 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, if (recon_state->msg_version >= 2) { int num_fcntl_locks, num_flock_locks; - struct ceph_filelock *flocks; + struct ceph_filelock *flocks = NULL; size_t struct_len, total_len = 0; u8 struct_v = 0; encode_again: - ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); - flocks = kmalloc((num_fcntl_locks+num_flock_locks) * - sizeof(struct ceph_filelock), GFP_NOFS); - if (!flocks) { - err = -ENOMEM; - goto out_free; + if (rec.v2.flock_len) { + ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); + } else { + num_fcntl_locks = 0; + num_flock_locks = 0; } - err = ceph_encode_locks_to_buffer(inode, flocks, - num_fcntl_locks, - num_flock_locks); - if (err) { + if (num_fcntl_locks + num_flock_locks > 0) { + flocks = kmalloc((num_fcntl_locks + num_flock_locks) * + sizeof(struct ceph_filelock), GFP_NOFS); + if (!flocks) { + err = -ENOMEM; + goto out_free; + } + err = ceph_encode_locks_to_buffer(inode, flocks, + num_fcntl_locks, + num_flock_locks); + if (err) { + kfree(flocks); + flocks = NULL; + if (err == -ENOSPC) + goto encode_again; + goto out_free; + } + } else { kfree(flocks); - if (err == -ENOSPC) - goto encode_again; - goto out_free; + flocks = NULL; } if (recon_state->msg_version >= 3) { @@ -2985,6 +3020,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int s_nr_caps; struct ceph_pagelist *pagelist; struct ceph_reconnect_state recon_state; + LIST_HEAD(dispose); pr_info("mds%d reconnect start\n", mds); @@ -3018,7 +3054,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, */ session->s_cap_reconnect = 1; /* drop old cap expires; we're about to reestablish that state */ - cleanup_cap_releases(mdsc, session); + detach_cap_releases(session, &dispose); + spin_unlock(&session->s_cap_lock); + dispose_cap_releases(mdsc, &dispose); /* trim unused caps to reduce MDS's cache rejoin time */ if (mdsc->fsc->sb->s_root) @@ -3133,7 +3171,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, newmap->m_epoch, oldmap->m_epoch); for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) { - if (mdsc->sessions[i] == NULL) + if (!mdsc->sessions[i]) continue; s = mdsc->sessions[i]; oldstate = ceph_mdsmap_get_state(oldmap, i); @@ -3280,7 +3318,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, mutex_lock(&session->s_mutex); session->s_seq++; - if (inode == NULL) { + if (!inode) { dout("handle_lease no inode %llx\n", vino.ino); goto release; } @@ -3438,7 +3476,7 @@ static void delayed_work(struct work_struct *work) for (i = 0; i < mdsc->max_sessions; i++) { struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i); - if (s == NULL) + if (!s) continue; if (s->s_state == CEPH_MDS_SESSION_CLOSING) { dout("resending session close request for mds%d\n", @@ -3490,7 +3528,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) fsc->mdsc = mdsc; mutex_init(&mdsc->mutex); mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); - if (mdsc->mdsmap == NULL) { + if (!mdsc->mdsmap) { kfree(mdsc); return -ENOMEM; } @@ -3532,6 +3570,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) init_rwsem(&mdsc->pool_perm_rwsem); mdsc->pool_perm_tree = RB_ROOT; + strncpy(mdsc->nodename, utsname()->nodename, + sizeof(mdsc->nodename) - 1); return 0; } @@ -3847,14 +3887,14 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) goto err_out; } return; + bad: pr_err("error decoding fsmap\n"); err_out: mutex_lock(&mdsc->mutex); - mdsc->mdsmap_err = -ENOENT; + mdsc->mdsmap_err = err; __wake_requests(mdsc, &mdsc->waiting_for_map); mutex_unlock(&mdsc->mutex); - return; } /* |