summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/block/rbd.c6
-rw-r--r--fs/ceph/addr.c59
-rw-r--r--fs/ceph/caps.c38
-rw-r--r--fs/ceph/dir.c79
-rw-r--r--fs/ceph/file.c123
-rw-r--r--fs/ceph/inode.c13
-rw-r--r--fs/ceph/mds_client.c165
-rw-r--r--fs/ceph/mds_client.h13
-rw-r--r--fs/ceph/mdsmap.c22
-rw-r--r--fs/ceph/super.c19
-rw-r--r--fs/ceph/super.h31
-rw-r--r--fs/ceph/xattr.c12
-rw-r--r--fs/crypto/fname.c36
-rw-r--r--fs/crypto/fscrypt_private.h9
-rw-r--r--fs/crypto/hooks.c6
-rw-r--r--fs/crypto/policy.c35
-rw-r--r--fs/dcache.c15
-rw-r--r--fs/inode.c10
-rw-r--r--include/linux/ceph/ceph_fs.h8
-rw-r--r--include/linux/ceph/mdsmap.h1
-rw-r--r--include/linux/ceph/osd_client.h5
-rw-r--r--include/linux/dcache.h2
-rw-r--r--include/linux/fscrypt.h5
-rw-r--r--include/linux/mmdebug.h10
-rw-r--r--net/ceph/osd_client.c15
-rw-r--r--net/ceph/osdmap.c32
-rw-r--r--net/ceph/pagelist.c2
27 files changed, 538 insertions, 233 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 0d8ec2fe5740..f9e39301c4af 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1297,7 +1297,7 @@ static void rbd_osd_submit(struct ceph_osd_request *osd_req)
dout("%s osd_req %p for obj_req %p objno %llu %llu~%llu\n",
__func__, osd_req, obj_req, obj_req->ex.oe_objno,
obj_req->ex.oe_off, obj_req->ex.oe_len);
- ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
+ ceph_osdc_start_request(osd_req->r_osdc, osd_req);
}
/*
@@ -2081,7 +2081,7 @@ static int rbd_object_map_update(struct rbd_obj_request *obj_req, u64 snap_id,
if (ret)
return ret;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
return 0;
}
@@ -4768,7 +4768,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
if (ret)
goto out_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0)
ceph_copy_from_page_vector(pages, buf, 0, ret);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 2c3a9b5b4b74..dcf701b05cc1 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -122,7 +122,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio)
* Reference snap context in folio->private. Also set
* PagePrivate so that we get invalidate_folio callback.
*/
- VM_BUG_ON_FOLIO(folio_test_private(folio), folio);
+ VM_WARN_ON_FOLIO(folio->private, folio);
folio_attach_private(folio, snapc);
return ceph_fscache_dirty_folio(mapping, folio);
@@ -237,7 +237,7 @@ static void finish_netfs_read(struct ceph_osd_request *req)
if (err >= 0 && err < subreq->len)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
- netfs_subreq_terminated(subreq, err, true);
+ netfs_subreq_terminated(subreq, err, false);
num_pages = calc_pages_for(osd_data->alignment, osd_data->length);
ceph_put_page_vector(osd_data->pages, num_pages, false);
@@ -313,8 +313,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
int err = 0;
u64 len = subreq->len;
- if (ci->i_inline_version != CEPH_INLINE_NONE &&
- ceph_netfs_issue_op_inline(subreq))
+ if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
return;
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
@@ -338,6 +337,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
/* should always give us a page-aligned read */
WARN_ON_ONCE(page_off);
len = err;
+ err = 0;
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
req->r_callback = finish_netfs_read;
@@ -345,9 +345,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
req->r_inode = inode;
ihold(inode);
- err = ceph_osdc_start_request(req->r_osdc, req, false);
- if (err)
- iput(inode);
+ ceph_osdc_start_request(req->r_osdc, req);
out:
ceph_osdc_put_request(req);
if (err)
@@ -621,9 +619,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage %llu~%llu (%llu bytes)\n", page_off, len, len);
req->r_mtime = inode->i_mtime;
- err = ceph_osdc_start_request(osdc, req, true);
- if (!err)
- err = ceph_osdc_wait_request(osdc, req);
+ ceph_osdc_start_request(osdc, req);
+ err = ceph_osdc_wait_request(osdc, req);
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, len, err);
@@ -1151,8 +1148,7 @@ new_request:
}
req->r_mtime = inode->i_mtime;
- rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
- BUG_ON(rc);
+ ceph_osdc_start_request(&fsc->client->osdc, req);
req = NULL;
wbc->nr_to_write -= i;
@@ -1327,16 +1323,13 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
int r;
r = netfs_write_begin(&ci->netfs, file, inode->i_mapping, pos, len, &folio, NULL);
- if (r == 0)
- folio_wait_fscache(folio);
- if (r < 0) {
- if (folio)
- folio_put(folio);
- } else {
- WARN_ON_ONCE(!folio_test_locked(folio));
- *pagep = &folio->page;
- }
- return r;
+ if (r < 0)
+ return r;
+
+ folio_wait_fscache(folio);
+ WARN_ON_ONCE(!folio_test_locked(folio));
+ *pagep = &folio->page;
+ return 0;
}
/*
@@ -1439,7 +1432,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
inode, off, ceph_cap_string(got));
if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
- ci->i_inline_version == CEPH_INLINE_NONE) {
+ !ceph_has_inline_data(ci)) {
CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
ceph_add_rw_context(fi, &rw_ctx);
ret = filemap_fault(vmf);
@@ -1696,9 +1689,8 @@ int ceph_uninline_data(struct file *file)
}
req->r_mtime = inode->i_mtime;
- err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
- if (!err)
- err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ ceph_osdc_start_request(&fsc->client->osdc, req);
+ err = ceph_osdc_wait_request(&fsc->client->osdc, req);
ceph_osdc_put_request(req);
if (err < 0)
goto out_unlock;
@@ -1739,9 +1731,8 @@ int ceph_uninline_data(struct file *file)
}
req->r_mtime = inode->i_mtime;
- err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
- if (!err)
- err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ ceph_osdc_start_request(&fsc->client->osdc, req);
+ err = ceph_osdc_wait_request(&fsc->client->osdc, req);
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, len, err);
@@ -1912,15 +1903,13 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
osd_req_op_raw_data_in_pages(rd_req, 0, pages, PAGE_SIZE,
0, false, true);
- err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
+ ceph_osdc_start_request(&fsc->client->osdc, rd_req);
wr_req->r_mtime = ci->netfs.inode.i_mtime;
- err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
+ ceph_osdc_start_request(&fsc->client->osdc, wr_req);
- if (!err)
- err = ceph_osdc_wait_request(&fsc->client->osdc, rd_req);
- if (!err2)
- err2 = ceph_osdc_wait_request(&fsc->client->osdc, wr_req);
+ err = ceph_osdc_wait_request(&fsc->client->osdc, rd_req);
+ err2 = ceph_osdc_wait_request(&fsc->client->osdc, wr_req);
if (err >= 0 || err == -ENOENT)
have |= POOL_READ;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ac8fd5e7f540..53cfe026b3ea 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -602,8 +602,8 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
* @ci: inode to be moved
* @session: new auth caps session
*/
-static void change_auth_cap_ses(struct ceph_inode_info *ci,
- struct ceph_mds_session *session)
+void change_auth_cap_ses(struct ceph_inode_info *ci,
+ struct ceph_mds_session *session)
{
lockdep_assert_held(&ci->i_ceph_lock);
@@ -1978,14 +1978,15 @@ retry:
}
dout("check_caps %llx.%llx file_want %s used %s dirty %s flushing %s"
- " issued %s revoking %s retain %s %s%s\n", ceph_vinop(inode),
+ " issued %s revoking %s retain %s %s%s%s\n", ceph_vinop(inode),
ceph_cap_string(file_wanted),
ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
ceph_cap_string(ci->i_flushing_caps),
ceph_cap_string(issued), ceph_cap_string(revoking),
ceph_cap_string(retain),
(flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
- (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "");
+ (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
+ (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
/*
* If we no longer need to hold onto old our caps, and we may
@@ -3005,7 +3006,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
}
if (S_ISREG(ci->netfs.inode.i_mode) &&
- ci->i_inline_version != CEPH_INLINE_NONE &&
+ ceph_has_inline_data(ci) &&
(_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
i_size_read(inode) > 0) {
struct page *page =
@@ -3578,24 +3579,23 @@ static void handle_cap_grant(struct inode *inode,
fill_inline = true;
}
- if (ci->i_auth_cap == cap &&
- le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
- if (newcaps & ~extra_info->issued)
- wake = true;
+ if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
+ if (ci->i_auth_cap == cap) {
+ if (newcaps & ~extra_info->issued)
+ wake = true;
- if (ci->i_requested_max_size > max_size ||
- !(le32_to_cpu(grant->wanted) & CEPH_CAP_ANY_FILE_WR)) {
- /* re-request max_size if necessary */
- ci->i_requested_max_size = 0;
- wake = true;
- }
+ if (ci->i_requested_max_size > max_size ||
+ !(le32_to_cpu(grant->wanted) & CEPH_CAP_ANY_FILE_WR)) {
+ /* re-request max_size if necessary */
+ ci->i_requested_max_size = 0;
+ wake = true;
+ }
- ceph_kick_flushing_inode_caps(session, ci);
- spin_unlock(&ci->i_ceph_lock);
+ ceph_kick_flushing_inode_caps(session, ci);
+ }
up_read(&session->s_mdsc->snap_rwsem);
- } else {
- spin_unlock(&ci->i_ceph_lock);
}
+ spin_unlock(&ci->i_ceph_lock);
if (fill_inline)
ceph_fill_inline_data(inode, NULL, extra_info->inline_data,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index eae417d71136..e7e2ebac330d 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -856,6 +856,10 @@ static int ceph_mknod(struct user_namespace *mnt_userns, struct inode *dir,
if (ceph_snap(dir) != CEPH_NOSNAP)
return -EROFS;
+ err = ceph_wait_on_conflict_unlink(dentry);
+ if (err)
+ return err;
+
if (ceph_quota_is_max_files_exceeded(dir)) {
err = -EDQUOT;
goto out;
@@ -918,6 +922,10 @@ static int ceph_symlink(struct user_namespace *mnt_userns, struct inode *dir,
if (ceph_snap(dir) != CEPH_NOSNAP)
return -EROFS;
+ err = ceph_wait_on_conflict_unlink(dentry);
+ if (err)
+ return err;
+
if (ceph_quota_is_max_files_exceeded(dir)) {
err = -EDQUOT;
goto out;
@@ -968,9 +976,13 @@ static int ceph_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
struct ceph_mds_request *req;
struct ceph_acl_sec_ctx as_ctx = {};
- int err = -EROFS;
+ int err;
int op;
+ err = ceph_wait_on_conflict_unlink(dentry);
+ if (err)
+ return err;
+
if (ceph_snap(dir) == CEPH_SNAPDIR) {
/* mkdir .snap/foo is a MKSNAP */
op = CEPH_MDS_OP_MKSNAP;
@@ -980,6 +992,7 @@ static int ceph_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
op = CEPH_MDS_OP_MKDIR;
} else {
+ err = -EROFS;
goto out;
}
@@ -1037,6 +1050,10 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
struct ceph_mds_request *req;
int err;
+ err = ceph_wait_on_conflict_unlink(dentry);
+ if (err)
+ return err;
+
if (ceph_snap(dir) != CEPH_NOSNAP)
return -EROFS;
@@ -1071,9 +1088,27 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req)
{
+ struct dentry *dentry = req->r_dentry;
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+ struct ceph_dentry_info *di = ceph_dentry(dentry);
int result = req->r_err ? req->r_err :
le32_to_cpu(req->r_reply_info.head->result);
+ if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags))
+ pr_warn("%s dentry %p:%pd async unlink bit is not set\n",
+ __func__, dentry, dentry);
+
+ spin_lock(&fsc->async_unlink_conflict_lock);
+ hash_del_rcu(&di->hnode);
+ spin_unlock(&fsc->async_unlink_conflict_lock);
+
+ spin_lock(&dentry->d_lock);
+ di->flags &= ~CEPH_DENTRY_ASYNC_UNLINK;
+ wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT);
+ spin_unlock(&dentry->d_lock);
+
+ synchronize_rcu();
+
if (result == -EJUKEBOX)
goto out;
@@ -1081,7 +1116,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
if (result) {
int pathlen = 0;
u64 base = 0;
- char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
+ char *path = ceph_mdsc_build_path(dentry, &pathlen,
&base, 0);
/* mark error on parent + clear complete */
@@ -1089,13 +1124,13 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
ceph_dir_clear_complete(req->r_parent);
/* drop the dentry -- we don't know its status */
- if (!d_unhashed(req->r_dentry))
- d_drop(req->r_dentry);
+ if (!d_unhashed(dentry))
+ d_drop(dentry);
/* mark inode itself for an error (since metadata is bogus) */
mapping_set_error(req->r_old_inode->i_mapping, result);
- pr_warn("ceph: async unlink failure path=(%llx)%s result=%d!\n",
+ pr_warn("async unlink failure path=(%llx)%s result=%d!\n",
base, IS_ERR(path) ? "<<bad>>" : path, result);
ceph_mdsc_free_path(path, pathlen);
}
@@ -1180,6 +1215,8 @@ retry:
if (try_async && op == CEPH_MDS_OP_UNLINK &&
(req->r_dir_caps = get_caps_for_async_unlink(dir, dentry))) {
+ struct ceph_dentry_info *di = ceph_dentry(dentry);
+
dout("async unlink on %llu/%.*s caps=%s", ceph_ino(dir),
dentry->d_name.len, dentry->d_name.name,
ceph_cap_string(req->r_dir_caps));
@@ -1187,6 +1224,16 @@ retry:
req->r_callback = ceph_async_unlink_cb;
req->r_old_inode = d_inode(dentry);
ihold(req->r_old_inode);
+
+ spin_lock(&dentry->d_lock);
+ di->flags |= CEPH_DENTRY_ASYNC_UNLINK;
+ spin_unlock(&dentry->d_lock);
+
+ spin_lock(&fsc->async_unlink_conflict_lock);
+ hash_add_rcu(fsc->async_unlink_conflict, &di->hnode,
+ dentry->d_name.hash);
+ spin_unlock(&fsc->async_unlink_conflict_lock);
+
err = ceph_mdsc_submit_request(mdsc, dir, req);
if (!err) {
/*
@@ -1195,10 +1242,20 @@ retry:
*/
drop_nlink(inode);
d_delete(dentry);
- } else if (err == -EJUKEBOX) {
- try_async = false;
- ceph_mdsc_put_request(req);
- goto retry;
+ } else {
+ spin_lock(&fsc->async_unlink_conflict_lock);
+ hash_del_rcu(&di->hnode);
+ spin_unlock(&fsc->async_unlink_conflict_lock);
+
+ spin_lock(&dentry->d_lock);
+ di->flags &= ~CEPH_DENTRY_ASYNC_UNLINK;
+ spin_unlock(&dentry->d_lock);
+
+ if (err == -EJUKEBOX) {
+ try_async = false;
+ ceph_mdsc_put_request(req);
+ goto retry;
+ }
}
} else {
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
@@ -1237,6 +1294,10 @@ static int ceph_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
(!ceph_quota_is_same_realm(old_dir, new_dir)))
return -EXDEV;
+ err = ceph_wait_on_conflict_unlink(new_dentry);
+ if (err)
+ return err;
+
dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry);
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 284d2fda663d..04fd34557de8 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -240,8 +240,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
INIT_LIST_HEAD(&fi->rw_contexts);
fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen);
- if ((file->f_mode & FMODE_WRITE) &&
- ci->i_inline_version != CEPH_INLINE_NONE) {
+ if ((file->f_mode & FMODE_WRITE) && ceph_has_inline_data(ci)) {
ret = ceph_uninline_data(file);
if (ret < 0)
goto error;
@@ -568,7 +567,7 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
&base, 0);
- pr_warn("ceph: async create failure path=(%llx)%s result=%d!\n",
+ pr_warn("async create failure path=(%llx)%s result=%d!\n",
base, IS_ERR(path) ? "<<bad>>" : path, result);
ceph_mdsc_free_path(path, pathlen);
@@ -611,6 +610,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
struct ceph_mds_reply_inode in = { };
struct ceph_mds_reply_info_in iinfo = { .in = &in };
struct ceph_inode_info *ci = ceph_inode(dir);
+ struct ceph_dentry_info *di = ceph_dentry(dentry);
struct inode *inode;
struct timespec64 now;
struct ceph_string *pool_ns;
@@ -709,6 +709,12 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
file->f_mode |= FMODE_CREATED;
ret = finish_open(file, dentry, ceph_open);
}
+
+ spin_lock(&dentry->d_lock);
+ di->flags &= ~CEPH_DENTRY_ASYNC_CREATE;
+ wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_CREATE_BIT);
+ spin_unlock(&dentry->d_lock);
+
return ret;
}
@@ -735,6 +741,15 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
if (dentry->d_name.len > NAME_MAX)
return -ENAMETOOLONG;
+ err = ceph_wait_on_conflict_unlink(dentry);
+ if (err)
+ return err;
+ /*
+ * Do not truncate the file, since atomic_open is called before the
+ * permission check. The caller will do the truncation afterward.
+ */
+ flags &= ~O_TRUNC;
+
if (flags & O_CREAT) {
if (ceph_quota_is_max_files_exceeded(dir))
return -EDQUOT;
@@ -781,9 +796,16 @@ retry:
(req->r_dir_caps =
try_prep_async_create(dir, dentry, &lo,
&req->r_deleg_ino))) {
+ struct ceph_dentry_info *di = ceph_dentry(dentry);
+
set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
req->r_args.open.flags |= cpu_to_le32(CEPH_O_EXCL);
req->r_callback = ceph_async_create_cb;
+
+ spin_lock(&dentry->d_lock);
+ di->flags |= CEPH_DENTRY_ASYNC_CREATE;
+ spin_unlock(&dentry->d_lock);
+
err = ceph_mdsc_submit_request(mdsc, dir, req);
if (!err) {
err = ceph_finish_async_create(dir, dentry,
@@ -802,9 +824,7 @@ retry:
}
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
- err = ceph_mdsc_do_request(mdsc,
- (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
- req);
+ err = ceph_mdsc_do_request(mdsc, (flags & O_CREAT) ? dir : NULL, req);
if (err == -ENOENT) {
dentry = ceph_handle_snapdir(req, dentry);
if (IS_ERR(dentry)) {
@@ -960,9 +980,8 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off,
false, false);
- ret = ceph_osdc_start_request(osdc, req, false);
- if (!ret)
- ret = ceph_osdc_wait_request(osdc, req);
+ ceph_osdc_start_request(osdc, req);
+ ret = ceph_osdc_wait_request(osdc, req);
ceph_update_read_metrics(&fsc->mdsc->metric,
req->r_start_latency,
@@ -1225,7 +1244,7 @@ static void ceph_aio_retry_work(struct work_struct *work)
req->r_inode = inode;
req->r_priv = aio_req;
- ret = ceph_osdc_start_request(req->r_osdc, req, false);
+ ceph_osdc_start_request(req->r_osdc, req);
out:
if (ret < 0) {
req->r_result = ret;
@@ -1362,9 +1381,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
continue;
}
- ret = ceph_osdc_start_request(req->r_osdc, req, false);
- if (!ret)
- ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ ceph_osdc_start_request(req->r_osdc, req);
+ ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
if (write)
ceph_update_write_metrics(metric, req->r_start_latency,
@@ -1427,8 +1445,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
r_private_item);
list_del_init(&req->r_private_item);
if (ret >= 0)
- ret = ceph_osdc_start_request(req->r_osdc,
- req, false);
+ ceph_osdc_start_request(req->r_osdc, req);
if (ret < 0) {
req->r_result = ret;
ceph_aio_complete_req(req);
@@ -1541,9 +1558,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
false, true);
req->r_mtime = mtime;
- ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
- if (!ret)
- ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ ceph_osdc_start_request(&fsc->client->osdc, req);
+ ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, len, ret);
@@ -1627,7 +1643,7 @@ again:
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
ceph_cap_string(got));
- if (ci->i_inline_version == CEPH_INLINE_NONE) {
+ if (!ceph_has_inline_data(ci)) {
if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
ret = ceph_direct_read_write(iocb, to,
NULL, NULL);
@@ -1890,7 +1906,7 @@ retry_snap:
if (dirty)
__mark_inode_dirty(inode, dirty);
if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
- ceph_check_caps(ci, 0, NULL);
+ ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
}
dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
@@ -1930,57 +1946,15 @@ out_unlocked:
*/
static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
{
- struct inode *inode = file->f_mapping->host;
- struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- loff_t i_size;
- loff_t ret;
-
- inode_lock(inode);
-
if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
+ struct inode *inode = file_inode(file);
+ int ret;
+
ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
if (ret < 0)
- goto out;
- }
-
- i_size = i_size_read(inode);
- switch (whence) {
- case SEEK_END:
- offset += i_size;
- break;
- case SEEK_CUR:
- /*
- * Here we special-case the lseek(fd, 0, SEEK_CUR)
- * position-querying operation. Avoid rewriting the "same"
- * f_pos value back to the file because a concurrent read(),
- * write() or lseek() might have altered it
- */
- if (offset == 0) {
- ret = file->f_pos;
- goto out;
- }
- offset += file->f_pos;
- break;
- case SEEK_DATA:
- if (offset < 0 || offset >= i_size) {
- ret = -ENXIO;
- goto out;
- }
- break;
- case SEEK_HOLE:
- if (offset < 0 || offset >= i_size) {
- ret = -ENXIO;
- goto out;
- }
- offset = i_size;
- break;
+ return ret;
}
-
- ret = vfs_setpos(file, offset, max(i_size, fsc->max_file_size));
-
-out:
- inode_unlock(inode);
- return ret;
+ return generic_file_llseek(file, offset, whence);
}
static inline void ceph_zero_partial_page(
@@ -2049,12 +2023,10 @@ static int ceph_zero_partial_object(struct inode *inode,
}
req->r_mtime = inode->i_mtime;
- ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
- if (!ret) {
- ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
- if (ret == -ENOENT)
- ret = 0;
- }
+ ceph_osdc_start_request(&fsc->client->osdc, req);
+ ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ if (ret == -ENOENT)
+ ret = 0;
ceph_osdc_put_request(req);
out:
@@ -2356,7 +2328,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
if (IS_ERR(req))
ret = PTR_ERR(req);
else {
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
ceph_update_copyfrom_metrics(&fsc->mdsc->metric,
req->r_start_latency,
@@ -2549,7 +2521,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
/* Let the MDS know about dst file size change */
if (ceph_inode_set_size(dst_inode, dst_off) ||
ceph_quota_is_max_bytes_approaching(dst_inode, dst_off))
- ceph_check_caps(dst_ci, CHECK_CAPS_AUTHONLY, NULL);
+ ceph_check_caps(dst_ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_FLUSH,
+ NULL);
}
/* Mark Fw dirty */
spin_lock(&dst_ci->i_ceph_lock);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 56c53ab3618e..42351d7a0dd6 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1049,7 +1049,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
iinfo->inline_version >= ci->i_inline_version) {
int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
ci->i_inline_version = iinfo->inline_version;
- if (ci->i_inline_version != CEPH_INLINE_NONE &&
+ if (ceph_has_inline_data(ci) &&
(locked_page || (info_caps & cache_caps)))
fill_inline = true;
}
@@ -2275,9 +2275,15 @@ int ceph_try_to_choose_auth_mds(struct inode *inode, int mask)
*
* This cost much when doing the Locker state transition and
* usually will need to revoke caps from clients.
+ *
+ * And for the 'Xs' caps for getxattr we will also choose the
+ * auth MDS, because the MDS side code is buggy due to setxattr
+ * won't notify the replica MDSes when the values changed and
+ * the replica MDS will return the old values. Though we will
+ * fix it in MDS code, but this still makes sense for old ceph.
*/
if (((mask & CEPH_CAP_ANY_SHARED) && (issued & CEPH_CAP_ANY_EXCL))
- || (mask & CEPH_STAT_RSTAT))
+ || (mask & (CEPH_STAT_RSTAT | CEPH_STAT_CAP_XATTR)))
return USE_AUTH_MDS;
else
return USE_ANY_MDS;
@@ -2321,7 +2327,8 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
if (inline_version == 0) {
/* the reply is supposed to contain inline data */
err = -EINVAL;
- } else if (inline_version == CEPH_INLINE_NONE) {
+ } else if (inline_version == CEPH_INLINE_NONE ||
+ inline_version == 1) {
err = -ENODATA;
} else {
err = req->r_reply_info.targeti.inline_len;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 33f517d549ce..80f8b9ec1a31 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -456,7 +456,7 @@ static int ceph_parse_deleg_inos(void **p, void *end,
dout("added delegated inode 0x%llx\n",
start - 1);
} else if (err == -EBUSY) {
- pr_warn("ceph: MDS delegated inode 0x%llx more than once.\n",
+ pr_warn("MDS delegated inode 0x%llx more than once.\n",
start - 1);
} else {
return err;
@@ -655,6 +655,79 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
}
+/*
+ * In async unlink case the kclient won't wait for the first reply
+ * from MDS and just drop all the links and unhash the dentry and then
+ * succeeds immediately.
+ *
+ * For any new create/link/rename,etc requests followed by using the
+ * same file names we must wait for the first reply of the inflight
+ * unlink request, or the MDS possibly will fail these following
+ * requests with -EEXIST if the inflight async unlink request was
+ * delayed for some reasons.
+ *
+ * And the worst case is that for the none async openc request it will
+ * successfully open the file if the CDentry hasn't been unlinked yet,
+ * but later the previous delayed async unlink request will remove the
+ * CDenty. That means the just created file is possiblly deleted later
+ * by accident.
+ *
+ * We need to wait for the inflight async unlink requests to finish
+ * when creating new files/directories by using the same file names.
+ */
+int ceph_wait_on_conflict_unlink(struct dentry *dentry)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+ struct dentry *pdentry = dentry->d_parent;
+ struct dentry *udentry, *found = NULL;
+ struct ceph_dentry_info *di;
+ struct qstr dname;
+ u32 hash = dentry->d_name.hash;
+ int err;
+
+ dname.name = dentry->d_name.name;
+ dname.len = dentry->d_name.len;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(fsc->async_unlink_conflict, di,
+ hnode, hash) {
+ udentry = di->dentry;
+
+ spin_lock(&udentry->d_lock);
+ if (udentry->d_name.hash != hash)
+ goto next;
+ if (unlikely(udentry->d_parent != pdentry))
+ goto next;
+ if (!hash_hashed(&di->hnode))
+ goto next;
+
+ if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags))
+ pr_warn("%s dentry %p:%pd async unlink bit is not set\n",
+ __func__, dentry, dentry);
+
+ if (!d_same_name(udentry, pdentry, &dname))
+ goto next;
+
+ spin_unlock(&udentry->d_lock);
+ found = dget(udentry);
+ break;
+next:
+ spin_unlock(&udentry->d_lock);
+ }
+ rcu_read_unlock();
+
+ if (likely(!found))
+ return 0;
+
+ dout("%s dentry %p:%pd conflict with old %p:%pd\n", __func__,
+ dentry, dentry, found, found);
+
+ err = wait_on_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT,
+ TASK_KILLABLE);
+ dput(found);
+ return err;
+}
+
/*
* sessions
@@ -1220,14 +1293,17 @@ static int encode_supported_features(void **p, void *end)
if (count > 0) {
size_t i;
size_t size = FEATURE_BYTES(count);
+ unsigned long bit;
if (WARN_ON_ONCE(*p + 4 + size > end))
return -ERANGE;
ceph_encode_32(p, size);
memset(*p, 0, size);
- for (i = 0; i < count; i++)
- ((unsigned char*)(*p))[i / 8] |= BIT(feature_bits[i] % 8);
+ for (i = 0; i < count; i++) {
+ bit = feature_bits[i];
+ ((unsigned char *)(*p))[bit / 8] |= BIT(bit % 8);
+ }
*p += size;
} else {
if (WARN_ON_ONCE(*p + 4 > end))
@@ -2884,6 +2960,64 @@ static void __do_request(struct ceph_mds_client *mdsc,
if (req->r_request_started == 0) /* note request start time */
req->r_request_started = jiffies;
+ /*
+ * For async create we will choose the auth MDS of frag in parent
+ * directory to send the request and ususally this works fine, but
+ * if the migrated the dirtory to another MDS before it could handle
+ * it the request will be forwarded.
+ *
+ * And then the auth cap will be changed.
+ */
+ if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags) && req->r_num_fwd) {
+ struct ceph_dentry_info *di = ceph_dentry(req->r_dentry);
+ struct ceph_inode_info *ci;
+ struct ceph_cap *cap;
+
+ /*
+ * The request maybe handled very fast and the new inode
+ * hasn't been linked to the dentry yet. We need to wait
+ * for the ceph_finish_async_create(), which shouldn't be
+ * stuck too long or fail in thoery, to finish when forwarding
+ * the request.
+ */
+ if (!d_inode(req->r_dentry)) {
+ err = wait_on_bit(&di->flags, CEPH_DENTRY_ASYNC_CREATE_BIT,
+ TASK_KILLABLE);
+ if (err) {
+ mutex_lock(&req->r_fill_mutex);
+ set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
+ mutex_unlock(&req->r_fill_mutex);
+ goto out_session;
+ }
+ }
+
+ ci = ceph_inode(d_inode(req->r_dentry));
+
+ spin_lock(&ci->i_ceph_lock);
+ cap = ci->i_auth_cap;
+ if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE && mds != cap->mds) {
+ dout("do_request session changed for auth cap %d -> %d\n",
+ cap->session->s_mds, session->s_mds);
+
+ /* Remove the auth cap from old session */
+ spin_lock(&cap->session->s_cap_lock);
+ cap->session->s_nr_caps--;
+ list_del_init(&cap->session_caps);
+ spin_unlock(&cap->session->s_cap_lock);
+
+ /* Add the auth cap to the new session */
+ cap->mds = mds;
+ cap->session = session;
+ spin_lock(&session->s_cap_lock);
+ session->s_nr_caps++;
+ list_add_tail(&cap->session_caps, &session->s_caps);
+ spin_unlock(&session->s_cap_lock);
+
+ change_auth_cap_ses(ci, session);
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ }
+
err = __send_request(session, req, false);
out_session:
@@ -3464,11 +3598,26 @@ static void handle_session(struct ceph_mds_session *session,
case CEPH_SESSION_OPEN:
if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
pr_info("mds%d reconnect success\n", session->s_mds);
- session->s_state = CEPH_MDS_SESSION_OPEN;
- session->s_features = features;
- renewed_caps(mdsc, session, 0);
- if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features))
- metric_schedule_delayed(&mdsc->metric);
+
+ if (session->s_state == CEPH_MDS_SESSION_OPEN) {
+ pr_notice("mds%d is already opened\n", session->s_mds);
+ } else {
+ session->s_state = CEPH_MDS_SESSION_OPEN;
+ session->s_features = features;
+ renewed_caps(mdsc, session, 0);
+ if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT,
+ &session->s_features))
+ metric_schedule_delayed(&mdsc->metric);
+ }
+
+ /*
+ * The connection maybe broken and the session in client
+ * side has been reinitialized, need to update the seq
+ * anyway.
+ */
+ if (!session->s_seq && seq)
+ session->s_seq = seq;
+
wake = 1;
if (mdsc->stopping)
__close_session(mdsc, session);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 1140aecd82ce..256e3eada6c1 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -29,14 +29,12 @@ enum ceph_feature_type {
CEPHFS_FEATURE_MULTI_RECONNECT,
CEPHFS_FEATURE_DELEG_INO,
CEPHFS_FEATURE_METRIC_COLLECT,
+ CEPHFS_FEATURE_ALTERNATE_NAME,
+ CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
- CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
+ CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
};
-/*
- * This will always have the highest feature bit value
- * as the last element of the array.
- */
#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \
0, 1, 2, 3, 4, 5, 6, 7, \
CEPHFS_FEATURE_MIMIC, \
@@ -45,10 +43,8 @@ enum ceph_feature_type {
CEPHFS_FEATURE_MULTI_RECONNECT, \
CEPHFS_FEATURE_DELEG_INO, \
CEPHFS_FEATURE_METRIC_COLLECT, \
- \
- CEPHFS_FEATURE_MAX, \
+ CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \
}
-#define CEPHFS_FEATURES_CLIENT_REQUIRED {}
/*
* Some lock dependencies:
@@ -582,6 +578,7 @@ static inline int ceph_wait_on_async_create(struct inode *inode)
TASK_KILLABLE);
}
+extern int ceph_wait_on_conflict_unlink(struct dentry *dentry);
extern u64 ceph_get_deleg_ino(struct ceph_mds_session *session);
extern int ceph_restore_deleg_ino(struct ceph_mds_session *session, u64 ino);
#endif
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 30387733765d..8d0a6d2c2da4 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -352,12 +352,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
__decode_and_drop_type(p, end, u8, bad_ext);
}
if (mdsmap_ev >= 8) {
- u32 name_len;
/* enabled */
ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
- ceph_decode_32_safe(p, end, name_len, bad_ext);
- ceph_decode_need(p, end, name_len, bad_ext);
- *p += name_len;
+ /* fs_name */
+ ceph_decode_skip_string(p, end, bad_ext);
}
/* damaged */
if (mdsmap_ev >= 9) {
@@ -370,6 +368,22 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
} else {
m->m_damaged = false;
}
+ if (mdsmap_ev >= 17) {
+ /* balancer */
+ ceph_decode_skip_string(p, end, bad_ext);
+ /* standby_count_wanted */
+ ceph_decode_skip_32(p, end, bad_ext);
+ /* old_max_mds */
+ ceph_decode_skip_32(p, end, bad_ext);
+ /* min_compat_client */
+ ceph_decode_skip_8(p, end, bad_ext);
+ /* required_client_features */
+ ceph_decode_skip_set(p, end, 64, bad_ext);
+ ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
+ } else {
+ /* This forces the usage of the (sync) SETXATTR Op */
+ m->m_max_xattr_size = 0;
+ }
bad_ext:
dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
!!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 40140805bdcf..3fc48b43cab0 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -72,15 +72,9 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_type = CEPH_SUPER_MAGIC; /* ?? */
/*
- * express utilization in terms of large blocks to avoid
+ * Express utilization in terms of large blocks to avoid
* overflow on 32-bit machines.
- *
- * NOTE: for the time being, we make bsize == frsize to humor
- * not-yet-ancient versions of glibc that are broken.
- * Someday, we will probably want to report a real block
- * size... whatever that may mean for a network file system!
*/
- buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
/*
@@ -95,6 +89,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
}
+ /*
+ * NOTE: for the time being, we make bsize == frsize to humor
+ * not-yet-ancient versions of glibc that are broken.
+ * Someday, we will probably want to report a real block
+ * size... whatever that may mean for a network file system!
+ */
+ buf->f_bsize = buf->f_frsize;
+
buf->f_files = le64_to_cpu(st.num_objects);
buf->f_ffree = -1;
buf->f_namelen = NAME_MAX;
@@ -816,6 +818,9 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
if (!fsc->cap_wq)
goto fail_inode_wq;
+ hash_init(fsc->async_unlink_conflict);
+ spin_lock_init(&fsc->async_unlink_conflict_lock);
+
spin_lock(&ceph_fsc_lock);
list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list);
spin_unlock(&ceph_fsc_lock);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index f59dac66955b..40630e6f691c 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -19,6 +19,7 @@
#include <linux/security.h>
#include <linux/netfs.h>
#include <linux/fscache.h>
+#include <linux/hashtable.h>
#include <linux/ceph/libceph.h>
@@ -99,6 +100,8 @@ struct ceph_mount_options {
char *mon_addr;
};
+#define CEPH_ASYNC_CREATE_CONFLICT_BITS 8
+
struct ceph_fs_client {
struct super_block *sb;
@@ -124,6 +127,9 @@ struct ceph_fs_client {
struct workqueue_struct *inode_wq;
struct workqueue_struct *cap_wq;
+ DECLARE_HASHTABLE(async_unlink_conflict, CEPH_ASYNC_CREATE_CONFLICT_BITS);
+ spinlock_t async_unlink_conflict_lock;
+
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_dentry_lru, *debugfs_caps;
struct dentry *debugfs_congestion_kb;
@@ -280,7 +286,8 @@ struct ceph_dentry_info {
struct dentry *dentry;
struct ceph_mds_session *lease_session;
struct list_head lease_list;
- unsigned flags;
+ struct hlist_node hnode;
+ unsigned long flags;
int lease_shared_gen;
u32 lease_gen;
u32 lease_seq;
@@ -289,10 +296,14 @@ struct ceph_dentry_info {
u64 offset;
};
-#define CEPH_DENTRY_REFERENCED 1
-#define CEPH_DENTRY_LEASE_LIST 2
-#define CEPH_DENTRY_SHRINK_LIST 4
-#define CEPH_DENTRY_PRIMARY_LINK 8
+#define CEPH_DENTRY_REFERENCED (1 << 0)
+#define CEPH_DENTRY_LEASE_LIST (1 << 1)
+#define CEPH_DENTRY_SHRINK_LIST (1 << 2)
+#define CEPH_DENTRY_PRIMARY_LINK (1 << 3)
+#define CEPH_DENTRY_ASYNC_UNLINK_BIT (4)
+#define CEPH_DENTRY_ASYNC_UNLINK (1 << CEPH_DENTRY_ASYNC_UNLINK_BIT)
+#define CEPH_DENTRY_ASYNC_CREATE_BIT (5)
+#define CEPH_DENTRY_ASYNC_CREATE (1 << CEPH_DENTRY_ASYNC_CREATE_BIT)
struct ceph_inode_xattrs_info {
/*
@@ -758,6 +769,8 @@ extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
extern void ceph_reservation_status(struct ceph_fs_client *client,
int *total, int *avail, int *used,
int *reserved, int *min);
+extern void change_auth_cap_ses(struct ceph_inode_info *ci,
+ struct ceph_mds_session *session);
@@ -1218,6 +1231,14 @@ extern int ceph_pool_perm_check(struct inode *inode, int need);
extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate);
+static inline bool ceph_has_inline_data(struct ceph_inode_info *ci)
+{
+ if (ci->i_inline_version == CEPH_INLINE_NONE ||
+ ci->i_inline_version == 1) /* initial version, no data */
+ return false;
+ return true;
+}
+
/* file.c */
extern const struct file_operations ceph_file_fops;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index f141f5246163..f31350cda960 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
flags |= CEPH_XATTR_REMOVE;
}
- dout("setxattr value=%.*s\n", (int)size, value);
+ dout("setxattr value size: %zu\n", size);
/* do request */
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
@@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
spin_lock(&ci->i_ceph_lock);
retry:
issued = __ceph_caps_issued(ci, NULL);
- if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
+ required_blob_size = __get_required_blob_size(ci, name_len, val_len);
+ if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
+ (required_blob_size > mdsc->mdsmap->m_max_xattr_size)) {
+ dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
+ __func__, ci->i_xattrs.version, required_blob_size,
+ mdsc->mdsmap->m_max_xattr_size);
goto do_sync;
+ }
if (!lock_snap_rwsem && !ci->i_head_snapc) {
lock_snap_rwsem = true;
@@ -1201,8 +1207,6 @@ retry:
ceph_cap_string(issued));
__build_xattrs(inode);
- required_blob_size = __get_required_blob_size(ci, name_len, val_len);
-
if (!ci->i_xattrs.prealloc_blob ||
required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
struct ceph_buffer *blob;
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 14e0ef5e9a20..12bd61d20f69 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -86,7 +86,8 @@ static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
/**
* fscrypt_fname_encrypt() - encrypt a filename
* @inode: inode of the parent directory (for regular filenames)
- * or of the symlink (for symlink targets)
+ * or of the symlink (for symlink targets). Key must already be
+ * set up.
* @iname: the filename to encrypt
* @out: (output) the encrypted filename
* @olen: size of the encrypted filename. It must be at least @iname->len.
@@ -137,6 +138,7 @@ int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname,
return 0;
}
+EXPORT_SYMBOL_GPL(fscrypt_fname_encrypt);
/**
* fname_decrypt() - decrypt a filename
@@ -264,9 +266,9 @@ static int fscrypt_base64url_decode(const char *src, int srclen, u8 *dst)
return bp - dst;
}
-bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
- u32 orig_len, u32 max_len,
- u32 *encrypted_len_ret)
+bool __fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
+ u32 orig_len, u32 max_len,
+ u32 *encrypted_len_ret)
{
int padding = 4 << (fscrypt_policy_flags(policy) &
FSCRYPT_POLICY_FLAGS_PAD_MASK);
@@ -281,6 +283,29 @@ bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
}
/**
+ * fscrypt_fname_encrypted_size() - calculate length of encrypted filename
+ * @inode: parent inode of dentry name being encrypted. Key must
+ * already be set up.
+ * @orig_len: length of the original filename
+ * @max_len: maximum length to return
+ * @encrypted_len_ret: where calculated length should be returned (on success)
+ *
+ * Filenames that are shorter than the maximum length may have their lengths
+ * increased slightly by encryption, due to padding that is applied.
+ *
+ * Return: false if the orig_len is greater than max_len. Otherwise, true and
+ * fill out encrypted_len_ret with the length (up to max_len).
+ */
+bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len,
+ u32 max_len, u32 *encrypted_len_ret)
+{
+ return __fscrypt_fname_encrypted_size(&inode->i_crypt_info->ci_policy,
+ orig_len, max_len,
+ encrypted_len_ret);
+}
+EXPORT_SYMBOL_GPL(fscrypt_fname_encrypted_size);
+
+/**
* fscrypt_fname_alloc_buffer() - allocate a buffer for presented filenames
* @max_encrypted_len: maximum length of encrypted filenames the buffer will be
* used to present
@@ -435,8 +460,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
return ret;
if (fscrypt_has_encryption_key(dir)) {
- if (!fscrypt_fname_encrypted_size(&dir->i_crypt_info->ci_policy,
- iname->len, NAME_MAX,
+ if (!fscrypt_fname_encrypted_size(dir, iname->len, NAME_MAX,
&fname->crypto_buf.len))
return -ENAMETOOLONG;
fname->crypto_buf.name = kmalloc(fname->crypto_buf.len,
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index f5be777d8279..3afdaa084773 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -297,14 +297,11 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
const struct fscrypt_info *ci);
/* fname.c */
-int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname,
- u8 *out, unsigned int olen);
-bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
- u32 orig_len, u32 max_len,
- u32 *encrypted_len_ret);
+bool __fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
+ u32 orig_len, u32 max_len,
+ u32 *encrypted_len_ret);
/* hkdf.c */
-
struct fscrypt_hkdf {
struct crypto_shash *hmac_tfm;
};
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index af74599ae1cf..7c01025879b3 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -228,9 +228,9 @@ int fscrypt_prepare_symlink(struct inode *dir, const char *target,
* counting it (even though it is meaningless for ciphertext) is simpler
* for now since filesystems will assume it is there and subtract it.
*/
- if (!fscrypt_fname_encrypted_size(policy, len,
- max_len - sizeof(struct fscrypt_symlink_data),
- &disk_link->len))
+ if (!__fscrypt_fname_encrypted_size(policy, len,
+ max_len - sizeof(struct fscrypt_symlink_data),
+ &disk_link->len))
return -ENAMETOOLONG;
disk_link->len += sizeof(struct fscrypt_symlink_data);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 8a054e6d1e68..80b8ca0f340b 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -694,6 +694,32 @@ const union fscrypt_policy *fscrypt_policy_to_inherit(struct inode *dir)
}
/**
+ * fscrypt_context_for_new_inode() - create an encryption context for a new inode
+ * @ctx: where context should be written
+ * @inode: inode from which to fetch policy and nonce
+ *
+ * Given an in-core "prepared" (via fscrypt_prepare_new_inode) inode,
+ * generate a new context and write it to ctx. ctx _must_ be at least
+ * FSCRYPT_SET_CONTEXT_MAX_SIZE bytes.
+ *
+ * Return: size of the resulting context or a negative error code.
+ */
+int fscrypt_context_for_new_inode(void *ctx, struct inode *inode)
+{
+ struct fscrypt_info *ci = inode->i_crypt_info;
+
+ BUILD_BUG_ON(sizeof(union fscrypt_context) !=
+ FSCRYPT_SET_CONTEXT_MAX_SIZE);
+
+ /* fscrypt_prepare_new_inode() should have set up the key already. */
+ if (WARN_ON_ONCE(!ci))
+ return -ENOKEY;
+
+ return fscrypt_new_context(ctx, &ci->ci_policy, ci->ci_nonce);
+}
+EXPORT_SYMBOL_GPL(fscrypt_context_for_new_inode);
+
+/**
* fscrypt_set_context() - Set the fscrypt context of a new inode
* @inode: a new inode
* @fs_data: private data given by FS and passed to ->set_context()
@@ -709,12 +735,9 @@ int fscrypt_set_context(struct inode *inode, void *fs_data)
union fscrypt_context ctx;
int ctxsize;
- /* fscrypt_prepare_new_inode() should have set up the key already. */
- if (WARN_ON_ONCE(!ci))
- return -ENOKEY;
-
- BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE);
- ctxsize = fscrypt_new_context(&ctx, &ci->ci_policy, ci->ci_nonce);
+ ctxsize = fscrypt_context_for_new_inode(&ctx, inode);
+ if (ctxsize < 0)
+ return ctxsize;
/*
* This may be the first time the inode number is available, so do any
diff --git a/fs/dcache.c b/fs/dcache.c
index ea5cdec24ea7..c5dc32a59c76 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2248,10 +2248,16 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
}
EXPORT_SYMBOL(d_add_ci);
-
-static inline bool d_same_name(const struct dentry *dentry,
- const struct dentry *parent,
- const struct qstr *name)
+/**
+ * d_same_name - compare dentry name with case-exact name
+ * @parent: parent dentry
+ * @dentry: the negative dentry that was passed to the parent's lookup func
+ * @name: the case-exact name to be associated with the returned dentry
+ *
+ * Return: true if names are same, or false
+ */
+bool d_same_name(const struct dentry *dentry, const struct dentry *parent,
+ const struct qstr *name)
{
if (likely(!(parent->d_flags & DCACHE_OP_COMPARE))) {
if (dentry->d_name.len != name->len)
@@ -2262,6 +2268,7 @@ static inline bool d_same_name(const struct dentry *dentry,
dentry->d_name.len, dentry->d_name.name,
name) == 0;
}
+EXPORT_SYMBOL_GPL(d_same_name);
/**
* __d_lookup_rcu - search for a dentry (racy, store-free)
diff --git a/fs/inode.c b/fs/inode.c
index 9c3cd540c665..6462276dfdf0 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -422,6 +422,7 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_io_list);
INIT_LIST_HEAD(&inode->i_wb_list);
INIT_LIST_HEAD(&inode->i_lru);
+ INIT_LIST_HEAD(&inode->i_sb_list);
__address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
}
@@ -1021,7 +1022,6 @@ struct inode *new_inode_pseudo(struct super_block *sb)
spin_lock(&inode->i_lock);
inode->i_state = 0;
spin_unlock(&inode->i_lock);
- INIT_LIST_HEAD(&inode->i_sb_list);
}
return inode;
}
@@ -1165,7 +1165,6 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
{
struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
struct inode *old;
- bool creating = inode->i_state & I_CREATING;
again:
spin_lock(&inode_hash_lock);
@@ -1199,7 +1198,12 @@ again:
inode->i_state |= I_NEW;
hlist_add_head_rcu(&inode->i_hash, head);
spin_unlock(&inode->i_lock);
- if (!creating)
+
+ /*
+ * Add inode to the sb list if it's not already. It has I_NEW at this
+ * point, so it should be safe to test i_sb_list locklessly.
+ */
+ if (list_empty(&inode->i_sb_list))
inode_sb_list_add(inode);
unlock:
spin_unlock(&inode_hash_lock);
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 86bf82dbd8b8..49586ff26152 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -433,9 +433,9 @@ union ceph_mds_request_args {
__le32 stripe_unit; /* layout for newly created file */
__le32 stripe_count; /* ... */
__le32 object_size;
- __le32 file_replication;
- __le32 mask; /* CEPH_CAP_* */
- __le32 old_size;
+ __le32 pool;
+ __le32 mask; /* CEPH_CAP_* */
+ __le64 old_size;
} __attribute__ ((packed)) open;
struct {
__le32 flags;
@@ -768,7 +768,7 @@ struct ceph_mds_caps {
__le32 xattr_len;
__le64 xattr_version;
- /* filelock */
+ /* a union of non-export and export bodies. */
__le64 size, max_size, truncate_size;
__le32 truncate_seq;
struct ceph_timespec mtime, atime, ctime;
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index 523fd0452856..4c3e0648dc27 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -25,6 +25,7 @@ struct ceph_mdsmap {
u32 m_session_timeout; /* seconds */
u32 m_session_autoclose; /* seconds */
u64 m_max_file_size;
+ u64 m_max_xattr_size; /* maximum size for xattrs blob */
u32 m_max_mds; /* expected up:active mds number */
u32 m_num_active_mds; /* actual up:active mds number */
u32 possible_max_rank; /* possible max rank index */
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index cba8a6ffc329..fb6be72104df 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -507,9 +507,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
extern void ceph_osdc_get_request(struct ceph_osd_request *req);
extern void ceph_osdc_put_request(struct ceph_osd_request *req);
-extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
- struct ceph_osd_request *req,
- bool nofail);
+void ceph_osdc_start_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req);
extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c73e5e327e76..92c78ed02b54 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -233,6 +233,8 @@ extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
wait_queue_head_t *);
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
+extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent,
+ const struct qstr *name);
extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
extern struct dentry *d_find_any_alias(struct inode *inode);
extern struct dentry * d_obtain_alias(struct inode *);
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index e60d57c99cb6..7d2f1e0f23b1 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -284,6 +284,7 @@ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg);
int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *arg);
int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg);
int fscrypt_has_permitted_context(struct inode *parent, struct inode *child);
+int fscrypt_context_for_new_inode(void *ctx, struct inode *inode);
int fscrypt_set_context(struct inode *inode, void *fs_data);
struct fscrypt_dummy_policy {
@@ -327,6 +328,10 @@ void fscrypt_free_inode(struct inode *inode);
int fscrypt_drop_inode(struct inode *inode);
/* fname.c */
+int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname,
+ u8 *out, unsigned int olen);
+bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len,
+ u32 max_len, u32 *encrypted_len_ret);
int fscrypt_setup_filename(struct inode *inode, const struct qstr *iname,
int lookup, struct fscrypt_name *fname);
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index d7285f8148a3..15ae78cd2853 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -54,6 +54,15 @@ void dump_mm(const struct mm_struct *mm);
} \
unlikely(__ret_warn_once); \
})
+#define VM_WARN_ON_FOLIO(cond, folio) ({ \
+ int __ret_warn = !!(cond); \
+ \
+ if (unlikely(__ret_warn)) { \
+ dump_page(&folio->page, "VM_WARN_ON_FOLIO(" __stringify(cond)")");\
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn); \
+})
#define VM_WARN_ON_ONCE_FOLIO(cond, folio) ({ \
static bool __section(".data.once") __warned; \
int __ret_warn_once = !!(cond); \
@@ -79,6 +88,7 @@ void dump_mm(const struct mm_struct *mm);
#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 9d82bb42e958..87b883c7bfd6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4578,15 +4578,12 @@ bad:
/*
* Register request, send initial attempt.
*/
-int ceph_osdc_start_request(struct ceph_osd_client *osdc,
- struct ceph_osd_request *req,
- bool nofail)
+void ceph_osdc_start_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
{
down_read(&osdc->lock);
submit_request(req, false);
up_read(&osdc->lock);
-
- return 0;
}
EXPORT_SYMBOL(ceph_osdc_start_request);
@@ -4756,7 +4753,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
if (ret)
goto out_put_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
linger_cancel(lreq);
linger_put(lreq);
ret = wait_request_timeout(req, opts->mount_timeout);
@@ -4827,7 +4824,7 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
if (ret)
goto out_put_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
out_put_req:
@@ -5043,7 +5040,7 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
if (ret)
goto out_put_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) {
void *p = page_address(pages[0]);
@@ -5120,7 +5117,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
if (ret)
goto out_put_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) {
ret = req->r_ops[0].rval;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 2823bb3cff55..295098873861 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -11,6 +11,22 @@
#include <linux/crush/hash.h>
#include <linux/crush/mapper.h>
+static __printf(2, 3)
+void osdmap_info(const struct ceph_osdmap *map, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_INFO "%s (%pU e%u): %pV", KBUILD_MODNAME, &map->fsid,
+ map->epoch, &vaf);
+
+ va_end(args);
+}
+
char *ceph_osdmap_state_str(char *str, int len, u32 state)
{
if (!len)
@@ -571,10 +587,10 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
goto bad;
#endif
r = kmalloc(struct_size(r, steps, yes), GFP_NOFS);
- c->rules[i] = r;
if (r == NULL)
goto badmem;
dout(" rule %d is at %p\n", i, r);
+ c->rules[i] = r;
r->len = yes;
ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
@@ -1566,7 +1582,7 @@ static int decode_new_primary_affinity(void **p, void *end,
if (ret)
return ret;
- pr_info("osd%d primary-affinity 0x%x\n", osd, aff);
+ osdmap_info(map, "osd%d primary-affinity 0x%x\n", osd, aff);
}
return 0;
@@ -1864,9 +1880,9 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
osd = ceph_decode_32(p);
w = ceph_decode_32(p);
BUG_ON(osd >= map->max_osd);
- pr_info("osd%d weight 0x%x %s\n", osd, w,
- w == CEPH_OSD_IN ? "(in)" :
- (w == CEPH_OSD_OUT ? "(out)" : ""));
+ osdmap_info(map, "osd%d weight 0x%x %s\n", osd, w,
+ w == CEPH_OSD_IN ? "(in)" :
+ (w == CEPH_OSD_OUT ? "(out)" : ""));
map->osd_weight[osd] = w;
/*
@@ -1898,10 +1914,10 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
BUG_ON(osd >= map->max_osd);
if ((map->osd_state[osd] & CEPH_OSD_UP) &&
(xorstate & CEPH_OSD_UP))
- pr_info("osd%d down\n", osd);
+ osdmap_info(map, "osd%d down\n", osd);
if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
(xorstate & CEPH_OSD_EXISTS)) {
- pr_info("osd%d does not exist\n", osd);
+ osdmap_info(map, "osd%d does not exist\n", osd);
ret = set_primary_affinity(map, osd,
CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
if (ret)
@@ -1931,7 +1947,7 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
dout("%s osd%d addr %s\n", __func__, osd, ceph_pr_addr(&addr));
- pr_info("osd%d up\n", osd);
+ osdmap_info(map, "osd%d up\n", osd);
map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
map->osd_addr[osd] = addr;
}
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 65e34f78b05d..74622b278d57 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -96,7 +96,7 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
EXPORT_SYMBOL(ceph_pagelist_append);
/* Allocate enough pages for a pagelist to append the given amount
- * of data without without allocating.
+ * of data without allocating.
* Returns: 0 on success, -ENOMEM on error.
*/
int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space)