From 5c5f0d2b5f92c47baf82b9b211e27edd7d195158 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 9 Jun 2023 13:04:14 +0800 Subject: libceph: add doutc and *_client debug macros support This will help print the fsid and client's global_id in debug logs, and also print the function names. [ idryomov: %lld -> %llu, leading space for doutc(), don't include __func__ in pr_*() variants ] Link: https://tracker.ceph.com/issues/61590 Signed-off-by: Xiubo Li Reviewed-by: Patrick Donnelly Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_debug.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index d5a5da838caf..11a92a946016 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -19,12 +19,25 @@ pr_debug("%.*s %12.12s:%-4d : " fmt, \ 8 - (int)sizeof(KBUILD_MODNAME), " ", \ kbasename(__FILE__), __LINE__, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + pr_debug("%.*s %12.12s:%-4d : [%pU %llu] " fmt, \ + 8 - (int)sizeof(KBUILD_MODNAME), " ", \ + kbasename(__FILE__), __LINE__, \ + &client->fsid, client->monc.auth->global_id, \ + ##__VA_ARGS__) # else /* faux printk call just to see any compiler warnings. */ # define dout(fmt, ...) do { \ if (0) \ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ } while (0) +# define doutc(client, fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG "[%pU %llu] " fmt, \ + &client->fsid, \ + client->monc.auth->global_id, \ + ##__VA_ARGS__); \ + } while (0) # endif #else @@ -33,7 +46,32 @@ * or, just wrap pr_debug */ # define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + pr_debug(" [%pU %llu] %s: " fmt, &client->fsid, \ + client->monc.auth->global_id, __func__, ##__VA_ARGS__) #endif +#define pr_notice_client(client, fmt, ...) \ + pr_notice("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_info_client(client, fmt, ...) \ + pr_info("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_client(client, fmt, ...) \ + pr_warn("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_once_client(client, fmt, ...) \ + pr_warn_once("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_err_client(client, fmt, ...) \ + pr_err("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_ratelimited_client(client, fmt, ...) \ + pr_warn_ratelimited("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_err_ratelimited_client(client, fmt, ...) \ + pr_err_ratelimited("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) + #endif -- cgit v1.2.3 From 197b7d792d6aead2e30d4b2c054ffabae2ed73dc Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 9 Jun 2023 15:15:47 +0800 Subject: ceph: pass the mdsc to several helpers We will use the 'mdsc' to get the global_id in the following commits. Link: https://tracker.ceph.com/issues/61590 Signed-off-by: Xiubo Li Reviewed-by: Patrick Donnelly Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 15 +++++++++------ fs/ceph/debugfs.c | 4 ++-- fs/ceph/dir.c | 2 +- fs/ceph/file.c | 2 +- fs/ceph/mds_client.c | 39 ++++++++++++++++++++++----------------- fs/ceph/mds_client.h | 3 ++- fs/ceph/mdsmap.c | 3 ++- fs/ceph/snap.c | 16 ++++++++++------ fs/ceph/super.h | 3 ++- include/linux/ceph/mdsmap.h | 5 ++++- 10 files changed, 55 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 14215ec646f7..c00f15b773f0 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1178,7 +1178,8 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) } } -void ceph_remove_cap(struct ceph_cap *cap, bool queue_release) +void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, + bool queue_release) { struct ceph_inode_info *ci = cap->ci; struct ceph_fs_client *fsc; @@ -1342,6 +1343,8 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg) */ void __ceph_remove_caps(struct ceph_inode_info *ci) { + struct inode *inode = &ci->netfs.inode; + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct rb_node *p; /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU) @@ -1351,7 +1354,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci) while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); p = rb_next(p); - ceph_remove_cap(cap, true); + ceph_remove_cap(mdsc, cap, true); } spin_unlock(&ci->i_ceph_lock); } @@ -3999,7 +4002,7 @@ retry: goto out_unlock; if (target < 0) { - ceph_remove_cap(cap, false); + ceph_remove_cap(mdsc, cap, false); goto out_unlock; } @@ -4034,7 +4037,7 @@ retry: change_auth_cap_ses(ci, tcap->session); } } - ceph_remove_cap(cap, false); + ceph_remove_cap(mdsc, cap, false); goto out_unlock; } else if (tsession) { /* add placeholder for the export tagert */ @@ -4051,7 +4054,7 @@ retry: spin_unlock(&mdsc->cap_dirty_lock); } - ceph_remove_cap(cap, false); + ceph_remove_cap(mdsc, cap, false); goto out_unlock; } @@ -4164,7 +4167,7 @@ retry: ocap->mseq, mds, le32_to_cpu(ph->seq), le32_to_cpu(ph->mseq)); } - ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); + ceph_remove_cap(mdsc, ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); } *old_issued = issued; diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 3904333fa6c3..2f1e7498cd74 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -81,7 +81,7 @@ static int mdsc_show(struct seq_file *s, void *p) if (req->r_inode) { seq_printf(s, " #%llx", ceph_ino(req->r_inode)); } else if (req->r_dentry) { - path = ceph_mdsc_build_path(req->r_dentry, &pathlen, + path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen, &pathbase, 0); if (IS_ERR(path)) path = NULL; @@ -100,7 +100,7 @@ static int mdsc_show(struct seq_file *s, void *p) } if (req->r_old_dentry) { - path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, + path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &pathlen, &pathbase, 0); if (IS_ERR(path)) path = NULL; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 854cbdd66661..fff5cb2df9a8 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1226,7 +1226,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, if (result) { int pathlen = 0; u64 base = 0; - char *path = ceph_mdsc_build_path(dentry, &pathlen, + char *path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &base, 0); /* mark error on parent + clear complete */ diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b5f8038065d7..7c4d79a23506 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -574,7 +574,7 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, if (result) { int pathlen = 0; u64 base = 0; - char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen, + char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen, &base, 0); pr_warn("async create failure path=(%llx)%s result=%d!\n", diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 293b93182955..284ba087c507 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2126,6 +2126,7 @@ out: */ static int trim_caps_cb(struct inode *inode, int mds, void *arg) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); int *remaining = arg; struct ceph_inode_info *ci = ceph_inode(inode); int used, wanted, oissued, mine; @@ -2173,7 +2174,7 @@ static int trim_caps_cb(struct inode *inode, int mds, void *arg) if (oissued) { /* we aren't the only cap.. just remove us */ - ceph_remove_cap(cap, true); + ceph_remove_cap(mdsc, cap, true); (*remaining)--; } else { struct dentry *dentry; @@ -2588,6 +2589,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) /** * ceph_mdsc_build_path - build a path string to a given dentry + * @mdsc: mds client * @dentry: dentry to which path should be built * @plen: returned length of string * @pbase: returned base inode number @@ -2607,8 +2609,8 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) * Encode hidden .snap dirs as a double /, i.e. * foo/.snap/bar -> foo//bar */ -char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase, - int for_wire) +char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, + int *plen, u64 *pbase, int for_wire) { struct dentry *cur; struct inode *inode; @@ -2726,9 +2728,9 @@ retry: return path + pos; } -static int build_dentry_path(struct dentry *dentry, struct inode *dir, - const char **ppath, int *ppathlen, u64 *pino, - bool *pfreepath, bool parent_locked) +static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry, + struct inode *dir, const char **ppath, int *ppathlen, + u64 *pino, bool *pfreepath, bool parent_locked) { char *path; @@ -2744,7 +2746,7 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir, return 0; } rcu_read_unlock(); - path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); + path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1); if (IS_ERR(path)) return PTR_ERR(path); *ppath = path; @@ -2756,6 +2758,7 @@ static int build_inode_path(struct inode *inode, const char **ppath, int *ppathlen, u64 *pino, bool *pfreepath) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct dentry *dentry; char *path; @@ -2765,7 +2768,7 @@ static int build_inode_path(struct inode *inode, return 0; } dentry = d_find_alias(inode); - path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); + path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1); dput(dentry); if (IS_ERR(path)) return PTR_ERR(path); @@ -2778,10 +2781,11 @@ static int build_inode_path(struct inode *inode, * request arguments may be specified via an inode *, a dentry *, or * an explicit ino+path. */ -static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, - struct inode *rdiri, const char *rpath, - u64 rino, const char **ppath, int *pathlen, - u64 *ino, bool *freepath, bool parent_locked) +static int set_request_path_attr(struct ceph_mds_client *mdsc, struct inode *rinode, + struct dentry *rdentry, struct inode *rdiri, + const char *rpath, u64 rino, const char **ppath, + int *pathlen, u64 *ino, bool *freepath, + bool parent_locked) { int r = 0; @@ -2790,7 +2794,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode), ceph_snap(rinode)); } else if (rdentry) { - r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino, + r = build_dentry_path(mdsc, rdentry, rdiri, ppath, pathlen, ino, freepath, parent_locked); dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); @@ -2877,7 +2881,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, &session->s_features); - ret = set_request_path_attr(req->r_inode, req->r_dentry, + ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry, req->r_parent, req->r_path1, req->r_ino1.ino, &path1, &pathlen1, &ino1, &freepath1, test_bit(CEPH_MDS_R_PARENT_LOCKED, @@ -2891,7 +2895,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, if (req->r_old_dentry && !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED)) old_dentry = req->r_old_dentry; - ret = set_request_path_attr(NULL, old_dentry, + ret = set_request_path_attr(mdsc, NULL, old_dentry, req->r_old_dentry_dir, req->r_path2, req->r_ino2.ino, &path2, &pathlen2, &ino2, &freepath2, true); @@ -4290,6 +4294,7 @@ out_unlock: */ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); union { struct ceph_mds_cap_reconnect v2; struct ceph_mds_cap_reconnect_v1 v1; @@ -4307,7 +4312,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) dentry = d_find_primary(inode); if (dentry) { /* set pathbase to parent dir when msg_version >= 2 */ - path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, + path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, recon_state->msg_version >= 2); dput(dentry); if (IS_ERR(path)) { @@ -5662,7 +5667,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) return; } - newmap = ceph_mdsmap_decode(&p, end, ceph_msgr2(mdsc->fsc->client)); + newmap = ceph_mdsmap_decode(mdsc, &p, end, ceph_msgr2(mdsc->fsc->client)); if (IS_ERR(newmap)) { err = PTR_ERR(newmap); goto bad_unlock; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 5a3714bdd64a..d930eb79dc38 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -581,7 +581,8 @@ static inline void ceph_mdsc_free_path(char *path, int len) __putname(path - (PATH_MAX - 1 - len)); } -extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, +extern char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, + struct dentry *dentry, int *plen, u64 *base, int for_wire); extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry); diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 7dac21ee6ce7..6cbec7aed5a0 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -114,7 +114,8 @@ bad: * Ignore any fields we don't care about (there are quite a few of * them). */ -struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) +struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, + void *end, bool msgr2) { struct ceph_mdsmap *m; const void *start = *p; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 813f21add992..55090e6c9967 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -329,7 +329,8 @@ static int cmpu64_rev(const void *a, const void *b) /* * build the snap context for a given realm. */ -static int build_snap_context(struct ceph_snap_realm *realm, +static int build_snap_context(struct ceph_mds_client *mdsc, + struct ceph_snap_realm *realm, struct list_head *realm_queue, struct list_head *dirty_realms) { @@ -425,7 +426,8 @@ fail: /* * rebuild snap context for the given realm and all of its children. */ -static void rebuild_snap_realms(struct ceph_snap_realm *realm, +static void rebuild_snap_realms(struct ceph_mds_client *mdsc, + struct ceph_snap_realm *realm, struct list_head *dirty_realms) { LIST_HEAD(realm_queue); @@ -451,7 +453,8 @@ static void rebuild_snap_realms(struct ceph_snap_realm *realm, continue; } - last = build_snap_context(_realm, &realm_queue, dirty_realms); + last = build_snap_context(mdsc, _realm, &realm_queue, + dirty_realms); dout("%s %llx %p, %s\n", __func__, _realm->ino, _realm, last > 0 ? "is deferred" : !last ? "succeeded" : "failed"); @@ -708,7 +711,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, * Queue cap_snaps for snap writeback for this realm and its children. * Called under snap_rwsem, so realm topology won't change. */ -static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) +static void queue_realm_cap_snaps(struct ceph_mds_client *mdsc, + struct ceph_snap_realm *realm) { struct ceph_inode_info *ci; struct inode *lastinode = NULL; @@ -855,7 +859,7 @@ more: /* rebuild_snapcs when we reach the _end_ (root) of the trace */ if (realm_to_rebuild && p >= e) - rebuild_snap_realms(realm_to_rebuild, &dirty_realms); + rebuild_snap_realms(mdsc, realm_to_rebuild, &dirty_realms); if (!first_realm) first_realm = realm; @@ -873,7 +877,7 @@ more: realm = list_first_entry(&dirty_realms, struct ceph_snap_realm, dirty_item); list_del_init(&realm->dirty_item); - queue_realm_cap_snaps(realm); + queue_realm_cap_snaps(mdsc, realm); } if (realm_ret) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 51c7f2b14f6f..09c262dd5bd3 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1223,7 +1223,8 @@ extern void ceph_add_cap(struct inode *inode, unsigned cap, unsigned seq, u64 realmino, int flags, struct ceph_cap **new_cap); extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); -extern void ceph_remove_cap(struct ceph_cap *cap, bool queue_release); +extern void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, + bool queue_release); extern void __ceph_remove_caps(struct ceph_inode_info *ci); extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 4c3e0648dc27..89f1931f1ba6 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -5,6 +5,8 @@ #include #include +struct ceph_mds_client; + /* * mds map - describe servers in the mds cluster. * @@ -65,7 +67,8 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) } extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); -struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2); +struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, + void *end, bool msgr2); extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); -- cgit v1.2.3 From 522dc5108f07ef30e2c7399e59b9547d382308ff Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 12 Jun 2023 15:41:10 +0800 Subject: libceph, ceph: move mdsmap.h to fs/ceph The mdsmap.h is only used by CephFS, so move it to fs/ceph. Signed-off-by: Xiubo Li Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.h | 2 +- fs/ceph/mdsmap.c | 2 +- fs/ceph/mdsmap.h | 75 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/ceph/mdsmap.h | 75 --------------------------------------------- 4 files changed, 77 insertions(+), 77 deletions(-) create mode 100644 fs/ceph/mdsmap.h delete mode 100644 include/linux/ceph/mdsmap.h (limited to 'include') diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index d930eb79dc38..df9e3d668969 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -14,9 +14,9 @@ #include #include -#include #include +#include "mdsmap.h" #include "metric.h" #include "super.h" diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index fa080183ac1f..fae97c25ce58 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -7,10 +7,10 @@ #include #include -#include #include #include +#include "mdsmap.h" #include "mds_client.h" #include "super.h" diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h new file mode 100644 index 000000000000..89f1931f1ba6 --- /dev/null +++ b/fs/ceph/mdsmap.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _FS_CEPH_MDSMAP_H +#define _FS_CEPH_MDSMAP_H + +#include +#include + +struct ceph_mds_client; + +/* + * mds map - describe servers in the mds cluster. + * + * we limit fields to those the client actually xcares about + */ +struct ceph_mds_info { + u64 global_id; + struct ceph_entity_addr addr; + s32 state; + int num_export_targets; + bool laggy; + u32 *export_targets; +}; + +struct ceph_mdsmap { + u32 m_epoch, m_client_epoch, m_last_failure; + u32 m_root; + u32 m_session_timeout; /* seconds */ + u32 m_session_autoclose; /* seconds */ + u64 m_max_file_size; + u64 m_max_xattr_size; /* maximum size for xattrs blob */ + u32 m_max_mds; /* expected up:active mds number */ + u32 m_num_active_mds; /* actual up:active mds number */ + u32 possible_max_rank; /* possible max rank index */ + struct ceph_mds_info *m_info; + + /* which object pools file data can be stored in */ + int m_num_data_pg_pools; + u64 *m_data_pg_pools; + u64 m_cas_pg_pool; + + bool m_enabled; + bool m_damaged; + int m_num_laggy; +}; + +static inline struct ceph_entity_addr * +ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) +{ + if (w >= m->possible_max_rank) + return NULL; + return &m->m_info[w].addr; +} + +static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) +{ + BUG_ON(w < 0); + if (w >= m->possible_max_rank) + return CEPH_MDS_STATE_DNE; + return m->m_info[w].state; +} + +static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) +{ + if (w >= 0 && w < m->possible_max_rank) + return m->m_info[w].laggy; + return false; +} + +extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); +struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, + void *end, bool msgr2); +extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); +extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); + +#endif diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h deleted file mode 100644 index 89f1931f1ba6..000000000000 --- a/include/linux/ceph/mdsmap.h +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _FS_CEPH_MDSMAP_H -#define _FS_CEPH_MDSMAP_H - -#include -#include - -struct ceph_mds_client; - -/* - * mds map - describe servers in the mds cluster. - * - * we limit fields to those the client actually xcares about - */ -struct ceph_mds_info { - u64 global_id; - struct ceph_entity_addr addr; - s32 state; - int num_export_targets; - bool laggy; - u32 *export_targets; -}; - -struct ceph_mdsmap { - u32 m_epoch, m_client_epoch, m_last_failure; - u32 m_root; - u32 m_session_timeout; /* seconds */ - u32 m_session_autoclose; /* seconds */ - u64 m_max_file_size; - u64 m_max_xattr_size; /* maximum size for xattrs blob */ - u32 m_max_mds; /* expected up:active mds number */ - u32 m_num_active_mds; /* actual up:active mds number */ - u32 possible_max_rank; /* possible max rank index */ - struct ceph_mds_info *m_info; - - /* which object pools file data can be stored in */ - int m_num_data_pg_pools; - u64 *m_data_pg_pools; - u64 m_cas_pg_pool; - - bool m_enabled; - bool m_damaged; - int m_num_laggy; -}; - -static inline struct ceph_entity_addr * -ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) -{ - if (w >= m->possible_max_rank) - return NULL; - return &m->m_info[w].addr; -} - -static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) -{ - BUG_ON(w < 0); - if (w >= m->possible_max_rank) - return CEPH_MDS_STATE_DNE; - return m->m_info[w].state; -} - -static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) -{ - if (w >= 0 && w < m->possible_max_rank) - return m->m_info[w].laggy; - return false; -} - -extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); -struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, - void *end, bool msgr2); -extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); -extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); - -#endif -- cgit v1.2.3 From 1b90344614cc5949666328b37f03edec1d4e2873 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 7 Aug 2023 15:26:15 +0200 Subject: fs: export mnt_idmap_get/mnt_idmap_put These helpers are required to support idmapped mounts in CephFS. Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Christian Brauner Signed-off-by: Ilya Dryomov --- fs/mnt_idmapping.c | 2 ++ include/linux/mnt_idmapping.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c index 4905665c47d0..57d1dedf3f8f 100644 --- a/fs/mnt_idmapping.c +++ b/fs/mnt_idmapping.c @@ -256,6 +256,7 @@ struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap) return idmap; } +EXPORT_SYMBOL_GPL(mnt_idmap_get); /** * mnt_idmap_put - put a reference to an idmapping @@ -271,3 +272,4 @@ void mnt_idmap_put(struct mnt_idmap *idmap) kfree(idmap); } } +EXPORT_SYMBOL_GPL(mnt_idmap_put); diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 057c89867aa2..b8da2db4ecd2 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -115,6 +115,9 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) int vfsgid_in_group_p(vfsgid_t vfsgid); +struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); +void mnt_idmap_put(struct mnt_idmap *idmap); + vfsuid_t make_vfsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, kuid_t kuid); -- cgit v1.2.3 From 5ccd8530dd7ba97531a50ffa11eabe258d65a7af Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Aug 2023 15:26:17 +0200 Subject: ceph: handle idmapped mounts in create_request_message() Inode operations that create a new filesystem object such as ->mknod, ->create, ->mkdir() and others don't take a {g,u}id argument explicitly. Instead the caller's fs{g,u}id is used for the {g,u}id of the new filesystem object. In order to ensure that the correct {g,u}id is used map the caller's fs{g,u}id for creation requests. This doesn't require complex changes. It suffices to pass in the relevant idmapping recorded in the request message. If this request message was triggered from an inode operation that creates filesystem objects it will have passed down the relevant idmaping. If this is a request message that was triggered from an inode operation that doens't need to take idmappings into account the initial idmapping is passed down which is an identity mapping. This change uses a new cephfs protocol extension CEPHFS_FEATURE_HAS_OWNER_UIDGID which adds two new fields (owner_{u,g}id) to the request head structure. So, we need to ensure that MDS supports it otherwise we need to fail any IO that comes through an idmapped mount because we can't process it in a proper way. MDS server without such an extension will use caller_{u,g}id fields to set a new inode owner UID/GID which is incorrect because caller_{u,g}id values are unmapped. At the same time we can't map these fields with an idmapping as it can break UID/GID-based permission checks logic on the MDS side. This problem was described with a lot of details at [1], [2]. [1] https://lore.kernel.org/lkml/CAEivzxfw1fHO2TFA4dx3u23ZKK6Q+EThfzuibrhA3RKM=ZOYLg@mail.gmail.com/ [2] https://lore.kernel.org/all/20220104140414.155198-3-brauner@kernel.org/ Link: https://github.com/ceph/ceph/pull/52575 Link: https://tracker.ceph.com/issues/62217 Co-Developed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 56 ++++++++++++++++++++++++++++++++++++++++---- fs/ceph/mds_client.h | 5 +++- include/linux/ceph/ceph_fs.h | 10 +++++++- 3 files changed, 65 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ff5a05346f52..b97a16df7580 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2881,6 +2881,17 @@ static void encode_mclientrequest_tail(void **p, } } +static inline u16 mds_supported_head_version(struct ceph_mds_session *session) +{ + if (!test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, &session->s_features)) + return 1; + + if (!test_bit(CEPHFS_FEATURE_HAS_OWNER_UIDGID, &session->s_features)) + return 2; + + return CEPH_MDS_REQUEST_HEAD_VERSION; +} + static struct ceph_mds_request_head_legacy * find_legacy_request_head(void *p, u64 features) { @@ -2902,6 +2913,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, { int mds = session->s_mds; struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg; struct ceph_mds_request_head_legacy *lhead; const char *path1 = NULL; @@ -2915,8 +2927,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, void *p, *end; int ret; bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME); - bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, - &session->s_features); + u16 request_head_version = mds_supported_head_version(session); ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry, req->r_parent, req->r_path1, req->r_ino1.ino, @@ -2957,8 +2968,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, */ if (legacy) len = sizeof(struct ceph_mds_request_head_legacy); - else if (old_version) + else if (request_head_version == 1) len = sizeof(struct ceph_mds_request_head_old); + else if (request_head_version == 2) + len = offsetofend(struct ceph_mds_request_head, ext_num_fwd); else len = sizeof(struct ceph_mds_request_head); @@ -3008,6 +3021,18 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, lhead = find_legacy_request_head(msg->front.iov_base, session->s_con.peer_features); + if ((req->r_mnt_idmap != &nop_mnt_idmap) && + !test_bit(CEPHFS_FEATURE_HAS_OWNER_UIDGID, &session->s_features)) { + WARN_ON_ONCE(!IS_CEPH_MDS_OP_NEWINODE(req->r_op)); + + pr_err_ratelimited_client(cl, + "idmapped mount is used and CEPHFS_FEATURE_HAS_OWNER_UIDGID" + " is not supported by MDS. Fail request with -EIO.\n"); + + ret = -EIO; + goto out_err; + } + /* * The ceph_mds_request_head_legacy didn't contain a version field, and * one was added when we moved the message version from 3->4. @@ -3015,17 +3040,40 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, if (legacy) { msg->hdr.version = cpu_to_le16(3); p = msg->front.iov_base + sizeof(*lhead); - } else if (old_version) { + } else if (request_head_version == 1) { struct ceph_mds_request_head_old *ohead = msg->front.iov_base; msg->hdr.version = cpu_to_le16(4); ohead->version = cpu_to_le16(1); p = msg->front.iov_base + sizeof(*ohead); + } else if (request_head_version == 2) { + struct ceph_mds_request_head *nhead = msg->front.iov_base; + + msg->hdr.version = cpu_to_le16(6); + nhead->version = cpu_to_le16(2); + + p = msg->front.iov_base + offsetofend(struct ceph_mds_request_head, ext_num_fwd); } else { struct ceph_mds_request_head *nhead = msg->front.iov_base; + kuid_t owner_fsuid; + kgid_t owner_fsgid; msg->hdr.version = cpu_to_le16(6); nhead->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION); + nhead->struct_len = cpu_to_le32(sizeof(struct ceph_mds_request_head)); + + if (IS_CEPH_MDS_OP_NEWINODE(req->r_op)) { + owner_fsuid = from_vfsuid(req->r_mnt_idmap, &init_user_ns, + VFSUIDT_INIT(req->r_cred->fsuid)); + owner_fsgid = from_vfsgid(req->r_mnt_idmap, &init_user_ns, + VFSGIDT_INIT(req->r_cred->fsgid)); + nhead->owner_uid = cpu_to_le32(from_kuid(&init_user_ns, owner_fsuid)); + nhead->owner_gid = cpu_to_le32(from_kgid(&init_user_ns, owner_fsgid)); + } else { + nhead->owner_uid = cpu_to_le32(-1); + nhead->owner_gid = cpu_to_le32(-1); + } + p = msg->front.iov_base + sizeof(*nhead); } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index eb5e76446046..7c942294e96b 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -33,8 +33,10 @@ enum ceph_feature_type { CEPHFS_FEATURE_NOTIFY_SESSION_STATE, CEPHFS_FEATURE_OP_GETVXATTR, CEPHFS_FEATURE_32BITS_RETRY_FWD, + CEPHFS_FEATURE_NEW_SNAPREALM_INFO, + CEPHFS_FEATURE_HAS_OWNER_UIDGID, - CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_32BITS_RETRY_FWD, + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_HAS_OWNER_UIDGID, }; #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ @@ -49,6 +51,7 @@ enum ceph_feature_type { CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \ CEPHFS_FEATURE_OP_GETVXATTR, \ CEPHFS_FEATURE_32BITS_RETRY_FWD, \ + CEPHFS_FEATURE_HAS_OWNER_UIDGID, \ } /* diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index f3b3593254b9..ee1d0e5f9789 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -357,6 +357,11 @@ enum { CEPH_MDS_OP_RENAMESNAP = 0x01403, }; +#define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE || \ + op == CEPH_MDS_OP_MKNOD || \ + op == CEPH_MDS_OP_MKDIR || \ + op == CEPH_MDS_OP_SYMLINK) + extern const char *ceph_mds_op_name(int op); #define CEPH_SETATTR_MODE (1 << 0) @@ -497,7 +502,7 @@ struct ceph_mds_request_head_legacy { union ceph_mds_request_args args; } __attribute__ ((packed)); -#define CEPH_MDS_REQUEST_HEAD_VERSION 2 +#define CEPH_MDS_REQUEST_HEAD_VERSION 3 struct ceph_mds_request_head_old { __le16 version; /* struct version */ @@ -528,6 +533,9 @@ struct ceph_mds_request_head { __le32 ext_num_retry; /* new count retry attempts */ __le32 ext_num_fwd; /* new count fwd attempts */ + + __le32 struct_len; /* to store size of struct ceph_mds_request_head */ + __le32 owner_uid, owner_gid; /* used for OPs which create inodes */ } __attribute__ ((packed)); /* cap/lease release record */ -- cgit v1.2.3