summaryrefslogtreecommitdiff
path: root/fs/ceph/mds_client.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/mds_client.c')
-rw-r--r--fs/ceph/mds_client.c140
1 files changed, 90 insertions, 50 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 666a9f274832..ab69dcb70e8a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/fs.h>
@@ -7,7 +8,6 @@
#include <linux/sched.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
-#include <linux/utsname.h>
#include <linux/ratelimit.h>
#include "super.h"
@@ -408,7 +408,7 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
{
struct ceph_mds_session *session;
- if (mds >= mdsc->max_sessions || mdsc->sessions[mds] == NULL)
+ if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
return NULL;
session = mdsc->sessions[mds];
dout("lookup_mds_session %p %d\n", session,
@@ -483,7 +483,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
dout("register_session realloc to %d\n", newmax);
sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
- if (sa == NULL)
+ if (!sa)
goto fail_realloc;
if (mdsc->sessions) {
memcpy(sa, mdsc->sessions,
@@ -731,8 +731,16 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode = NULL;
if (req->r_inode) {
- inode = req->r_inode;
- ihold(inode);
+ if (ceph_snap(req->r_inode) != CEPH_SNAPDIR) {
+ inode = req->r_inode;
+ ihold(inode);
+ } else {
+ /* req->r_dentry is non-null for LSSNAP request */
+ rcu_read_lock();
+ inode = get_nonsnap_parent(req->r_dentry);
+ rcu_read_unlock();
+ dout("__choose_mds using snapdir's parent %p\n", inode);
+ }
} else if (req->r_dentry) {
/* ignore race with rename; old or new d_parent is okay */
struct dentry *parent;
@@ -877,8 +885,8 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
void *p;
const char* metadata[][2] = {
- {"hostname", utsname()->nodename},
- {"kernel_version", utsname()->release},
+ {"hostname", mdsc->nodename},
+ {"kernel_version", init_utsname()->release},
{"entity_id", opt->name ? : ""},
{"root", fsopt->server_path ? : "/"},
{NULL, NULL}
@@ -886,7 +894,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
/* Calculate serialized length of metadata */
metadata_bytes = 4; /* map length */
- for (i = 0; metadata[i][0] != NULL; ++i) {
+ for (i = 0; metadata[i][0]; ++i) {
metadata_bytes += 8 + strlen(metadata[i][0]) +
strlen(metadata[i][1]);
metadata_key_count++;
@@ -919,7 +927,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
ceph_encode_32(&p, metadata_key_count);
/* Two length-prefixed strings for each entry in the map */
- for (i = 0; metadata[i][0] != NULL; ++i) {
+ for (i = 0; metadata[i][0]; ++i) {
size_t const key_len = strlen(metadata[i][0]);
size_t const val_len = strlen(metadata[i][1]);
@@ -1031,22 +1039,23 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
* session caps
*/
-/* caller holds s_cap_lock, we drop it */
-static void cleanup_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session)
- __releases(session->s_cap_lock)
+static void detach_cap_releases(struct ceph_mds_session *session,
+ struct list_head *target)
{
- LIST_HEAD(tmp_list);
- list_splice_init(&session->s_cap_releases, &tmp_list);
+ lockdep_assert_held(&session->s_cap_lock);
+
+ list_splice_init(&session->s_cap_releases, target);
session->s_num_cap_releases = 0;
- spin_unlock(&session->s_cap_lock);
+ dout("dispose_cap_releases mds%d\n", session->s_mds);
+}
- dout("cleanup_cap_releases mds%d\n", session->s_mds);
- while (!list_empty(&tmp_list)) {
+static void dispose_cap_releases(struct ceph_mds_client *mdsc,
+ struct list_head *dispose)
+{
+ while (!list_empty(dispose)) {
struct ceph_cap *cap;
/* zero out the in-progress message */
- cap = list_first_entry(&tmp_list,
- struct ceph_cap, session_caps);
+ cap = list_first_entry(dispose, struct ceph_cap, session_caps);
list_del(&cap->session_caps);
ceph_put_cap(mdsc, cap);
}
@@ -1122,7 +1131,7 @@ static int iterate_session_caps(struct ceph_mds_session *session,
spin_lock(&session->s_cap_lock);
p = p->next;
- if (cap->ci == NULL) {
+ if (!cap->ci) {
dout("iterate_session_caps finishing cap %p removal\n",
cap);
BUG_ON(cap->session != session);
@@ -1207,6 +1216,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
}
spin_unlock(&mdsc->cap_dirty_lock);
+ if (atomic_read(&ci->i_filelock_ref) > 0) {
+ /* make further file lock syscall return -EIO */
+ ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
+ pr_warn_ratelimited(" dropping file locks for %p %lld\n",
+ inode, ceph_ino(inode));
+ }
+
if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
ci->i_prealloc_cap_flush = NULL;
@@ -1236,6 +1252,8 @@ static void remove_session_caps(struct ceph_mds_session *session)
{
struct ceph_fs_client *fsc = session->s_mdsc->fsc;
struct super_block *sb = fsc->sb;
+ LIST_HEAD(dispose);
+
dout("remove_session_caps on %p\n", session);
iterate_session_caps(session, remove_session_caps_cb, fsc);
@@ -1270,10 +1288,12 @@ static void remove_session_caps(struct ceph_mds_session *session)
}
// drop cap expires and unlock s_cap_lock
- cleanup_cap_releases(session->s_mdsc, session);
+ detach_cap_releases(session, &dispose);
BUG_ON(session->s_nr_caps > 0);
BUG_ON(!list_empty(&session->s_cap_flushing));
+ spin_unlock(&session->s_cap_lock);
+ dispose_cap_releases(session->s_mdsc, &dispose);
}
/*
@@ -1454,6 +1474,11 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
goto out;
if ((used | wanted) & CEPH_CAP_ANY_WR)
goto out;
+ /* Note: it's possible that i_filelock_ref becomes non-zero
+ * after dropping auth caps. It doesn't hurt because reply
+ * of lock mds request will re-add auth caps. */
+ if (atomic_read(&ci->i_filelock_ref) > 0)
+ goto out;
}
/* The inode has cached pages, but it's no longer used.
* we can safely drop it */
@@ -1748,7 +1773,7 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
int len, pos;
unsigned seq;
- if (dentry == NULL)
+ if (!dentry)
return ERR_PTR(-EINVAL);
retry:
@@ -1771,7 +1796,7 @@ retry:
len--; /* no leading '/' */
path = kmalloc(len+1, GFP_NOFS);
- if (path == NULL)
+ if (!path)
return ERR_PTR(-ENOMEM);
pos = len;
path[pos] = 0; /* trailing null */
@@ -2819,7 +2844,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_mds_cap_reconnect v2;
struct ceph_mds_cap_reconnect_v1 v1;
} rec;
- struct ceph_inode_info *ci;
+ struct ceph_inode_info *ci = cap->ci;
struct ceph_reconnect_state *recon_state = arg;
struct ceph_pagelist *pagelist = recon_state->pagelist;
char *path;
@@ -2828,8 +2853,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
u64 snap_follows;
struct dentry *dentry;
- ci = cap->ci;
-
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
inode, ceph_vinop(inode), cap, cap->cap_id,
ceph_cap_string(cap->issued));
@@ -2862,7 +2885,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
rec.v2.issued = cpu_to_le32(cap->issued);
rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
rec.v2.pathbase = cpu_to_le64(pathbase);
- rec.v2.flock_len = 0;
+ rec.v2.flock_len = (__force __le32)
+ ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
} else {
rec.v1.cap_id = cpu_to_le64(cap->cap_id);
rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
@@ -2875,7 +2899,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
}
if (list_empty(&ci->i_cap_snaps)) {
- snap_follows = 0;
+ snap_follows = ci->i_head_snapc ? ci->i_head_snapc->seq : 0;
} else {
struct ceph_cap_snap *capsnap =
list_first_entry(&ci->i_cap_snaps,
@@ -2886,26 +2910,37 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
if (recon_state->msg_version >= 2) {
int num_fcntl_locks, num_flock_locks;
- struct ceph_filelock *flocks;
+ struct ceph_filelock *flocks = NULL;
size_t struct_len, total_len = 0;
u8 struct_v = 0;
encode_again:
- ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
- flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
- sizeof(struct ceph_filelock), GFP_NOFS);
- if (!flocks) {
- err = -ENOMEM;
- goto out_free;
+ if (rec.v2.flock_len) {
+ ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
+ } else {
+ num_fcntl_locks = 0;
+ num_flock_locks = 0;
}
- err = ceph_encode_locks_to_buffer(inode, flocks,
- num_fcntl_locks,
- num_flock_locks);
- if (err) {
+ if (num_fcntl_locks + num_flock_locks > 0) {
+ flocks = kmalloc((num_fcntl_locks + num_flock_locks) *
+ sizeof(struct ceph_filelock), GFP_NOFS);
+ if (!flocks) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+ err = ceph_encode_locks_to_buffer(inode, flocks,
+ num_fcntl_locks,
+ num_flock_locks);
+ if (err) {
+ kfree(flocks);
+ flocks = NULL;
+ if (err == -ENOSPC)
+ goto encode_again;
+ goto out_free;
+ }
+ } else {
kfree(flocks);
- if (err == -ENOSPC)
- goto encode_again;
- goto out_free;
+ flocks = NULL;
}
if (recon_state->msg_version >= 3) {
@@ -2985,6 +3020,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
int s_nr_caps;
struct ceph_pagelist *pagelist;
struct ceph_reconnect_state recon_state;
+ LIST_HEAD(dispose);
pr_info("mds%d reconnect start\n", mds);
@@ -3018,7 +3054,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
*/
session->s_cap_reconnect = 1;
/* drop old cap expires; we're about to reestablish that state */
- cleanup_cap_releases(mdsc, session);
+ detach_cap_releases(session, &dispose);
+ spin_unlock(&session->s_cap_lock);
+ dispose_cap_releases(mdsc, &dispose);
/* trim unused caps to reduce MDS's cache rejoin time */
if (mdsc->fsc->sb->s_root)
@@ -3133,7 +3171,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
newmap->m_epoch, oldmap->m_epoch);
for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) {
- if (mdsc->sessions[i] == NULL)
+ if (!mdsc->sessions[i])
continue;
s = mdsc->sessions[i];
oldstate = ceph_mdsmap_get_state(oldmap, i);
@@ -3280,7 +3318,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
mutex_lock(&session->s_mutex);
session->s_seq++;
- if (inode == NULL) {
+ if (!inode) {
dout("handle_lease no inode %llx\n", vino.ino);
goto release;
}
@@ -3438,7 +3476,7 @@ static void delayed_work(struct work_struct *work)
for (i = 0; i < mdsc->max_sessions; i++) {
struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
- if (s == NULL)
+ if (!s)
continue;
if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
dout("resending session close request for mds%d\n",
@@ -3490,7 +3528,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
fsc->mdsc = mdsc;
mutex_init(&mdsc->mutex);
mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
- if (mdsc->mdsmap == NULL) {
+ if (!mdsc->mdsmap) {
kfree(mdsc);
return -ENOMEM;
}
@@ -3532,6 +3570,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
init_rwsem(&mdsc->pool_perm_rwsem);
mdsc->pool_perm_tree = RB_ROOT;
+ strncpy(mdsc->nodename, utsname()->nodename,
+ sizeof(mdsc->nodename) - 1);
return 0;
}
@@ -3847,14 +3887,14 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
goto err_out;
}
return;
+
bad:
pr_err("error decoding fsmap\n");
err_out:
mutex_lock(&mdsc->mutex);
- mdsc->mdsmap_err = -ENOENT;
+ mdsc->mdsmap_err = err;
__wake_requests(mdsc, &mdsc->waiting_for_map);
mutex_unlock(&mdsc->mutex);
- return;
}
/*