summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ext3/balloc.c34
-rw-r--r--fs/ext3/file.c1
-rw-r--r--fs/ext3/fsync.c15
-rw-r--r--fs/ext3/ialloc.c4
-rw-r--r--fs/ext3/inode.c56
-rw-r--r--fs/ext3/namei.c3
-rw-r--r--fs/ext3/super.c13
-rw-r--r--fs/ext3/xattr.c12
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/gfs2/glock.c9
-rw-r--r--fs/hfsplus/extents.c9
-rw-r--r--fs/hfsplus/super.c9
-rw-r--r--fs/hfsplus/unicode.c35
-rw-r--r--fs/hfsplus/wrapper.c9
-rw-r--r--fs/jbd/checkpoint.c4
-rw-r--r--fs/jbd/commit.c11
-rw-r--r--fs/jbd/journal.c4
-rw-r--r--fs/jbd/transaction.c10
-rw-r--r--fs/lockd/clntproc.c17
-rw-r--r--fs/nfs/callback_proc.c57
-rw-r--r--fs/nfs/client.c5
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4filelayout.c7
-rw-r--r--fs/nfs/nfs4filelayout.h11
-rw-r--r--fs/nfs/nfs4filelayoutdev.c452
-rw-r--r--fs/nfs/nfs4proc.c32
-rw-r--r--fs/nfs/nfs4state.c9
-rw-r--r--fs/nfs/nfs4xdr.c8
-rw-r--r--fs/nfs/pnfs.c13
-rw-r--r--fs/nfs/pnfs.h1
-rw-r--r--fs/nfs/pnfs_dev.c13
-rw-r--r--fs/nilfs2/btree.c39
32 files changed, 679 insertions, 228 deletions
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index fe52297e31ad..f7d111e499ad 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -21,6 +21,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
+#include <trace/events/ext3.h>
/*
* balloc.c contains the blocks allocation and deallocation routines
@@ -161,6 +162,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
desc = ext3_get_group_desc(sb, block_group, NULL);
if (!desc)
return NULL;
+ trace_ext3_read_block_bitmap(sb, block_group);
bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
@@ -351,6 +353,7 @@ void ext3_rsv_window_add(struct super_block *sb,
struct rb_node * parent = NULL;
struct ext3_reserve_window_node *this;
+ trace_ext3_rsv_window_add(sb, rsv);
while (*p)
{
parent = *p;
@@ -476,8 +479,10 @@ void ext3_discard_reservation(struct inode *inode)
rsv = &block_i->rsv_window_node;
if (!rsv_is_empty(&rsv->rsv_window)) {
spin_lock(rsv_lock);
- if (!rsv_is_empty(&rsv->rsv_window))
+ if (!rsv_is_empty(&rsv->rsv_window)) {
+ trace_ext3_discard_reservation(inode, rsv);
rsv_window_remove(inode->i_sb, rsv);
+ }
spin_unlock(rsv_lock);
}
}
@@ -683,14 +688,10 @@ error_return:
void ext3_free_blocks(handle_t *handle, struct inode *inode,
ext3_fsblk_t block, unsigned long count)
{
- struct super_block * sb;
+ struct super_block *sb = inode->i_sb;
unsigned long dquot_freed_blocks;
- sb = inode->i_sb;
- if (!sb) {
- printk ("ext3_free_blocks: nonexistent device");
- return;
- }
+ trace_ext3_free_blocks(inode, block, count);
ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
if (dquot_freed_blocks)
dquot_free_block(inode, dquot_freed_blocks);
@@ -1136,6 +1137,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
else
start_block = grp_goal + group_first_block;
+ trace_ext3_alloc_new_reservation(sb, start_block);
size = my_rsv->rsv_goal_size;
if (!rsv_is_empty(&my_rsv->rsv_window)) {
@@ -1230,8 +1232,11 @@ retry:
* check if the first free block is within the
* free space we just reserved
*/
- if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
+ if (start_block >= my_rsv->rsv_start &&
+ start_block <= my_rsv->rsv_end) {
+ trace_ext3_reserved(sb, start_block, my_rsv);
return 0; /* success */
+ }
/*
* if the first free bit we found is out of the reservable space
* continue search for next reservable space,
@@ -1514,10 +1519,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
*errp = -ENOSPC;
sb = inode->i_sb;
- if (!sb) {
- printk("ext3_new_block: nonexistent device");
- return 0;
- }
/*
* Check quota for allocation of this block.
@@ -1528,8 +1529,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
return 0;
}
+ trace_ext3_request_blocks(inode, goal, num);
+
sbi = EXT3_SB(sb);
- es = EXT3_SB(sb)->s_es;
+ es = sbi->s_es;
ext3_debug("goal=%lu.\n", goal);
/*
* Allocate a block from reservation only when
@@ -1742,6 +1745,10 @@ allocated:
brelse(bitmap_bh);
dquot_free_block(inode, *count-num);
*count = num;
+
+ trace_ext3_allocate_blocks(inode, goal, num,
+ (unsigned long long)ret_block);
+
return ret_block;
io_error:
@@ -1996,6 +2003,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
if ((next - start) < minblocks)
goto free_extent;
+ trace_ext3_discard_blocks(sb, discard_block, next - start);
/* Send the TRIM command down to the device */
err = sb_issue_discard(sb, discard_block, next - start,
GFP_NOFS, 0);
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index f55df0e61cbd..86c8ab343f6f 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -71,7 +71,6 @@ const struct file_operations ext3_file_operations = {
};
const struct inode_operations ext3_file_inode_operations = {
- .truncate = ext3_truncate,
.setattr = ext3_setattr,
#ifdef CONFIG_EXT3_FS_XATTR
.setxattr = generic_setxattr,
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 09b13bb34c94..06a4394d2bc3 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -30,6 +30,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
+#include <trace/events/ext3.h>
/*
* akpm: A new design for ext3_sync_file().
@@ -51,10 +52,13 @@ int ext3_sync_file(struct file *file, int datasync)
int ret, needs_barrier = 0;
tid_t commit_tid;
+ J_ASSERT(ext3_journal_current_handle() == NULL);
+
+ trace_ext3_sync_file_enter(file, datasync);
+
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
- J_ASSERT(ext3_journal_current_handle() == NULL);
/*
* data=writeback,ordered:
@@ -70,8 +74,10 @@ int ext3_sync_file(struct file *file, int datasync)
* (they were dirtied by commit). But that's OK - the blocks are
* safe in-journal, which is all fsync() needs to ensure.
*/
- if (ext3_should_journal_data(inode))
- return ext3_force_commit(inode->i_sb);
+ if (ext3_should_journal_data(inode)) {
+ ret = ext3_force_commit(inode->i_sb);
+ goto out;
+ }
if (datasync)
commit_tid = atomic_read(&ei->i_datasync_tid);
@@ -91,5 +97,8 @@ int ext3_sync_file(struct file *file, int datasync)
*/
if (needs_barrier)
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+
+out:
+ trace_ext3_sync_file_exit(inode, ret);
return ret;
}
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index bfc2dc43681d..bf09cbf938cc 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -23,6 +23,7 @@
#include <linux/buffer_head.h>
#include <linux/random.h>
#include <linux/bitops.h>
+#include <trace/events/ext3.h>
#include <asm/byteorder.h>
@@ -118,6 +119,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
ino = inode->i_ino;
ext3_debug ("freeing inode %lu\n", ino);
+ trace_ext3_free_inode(inode);
is_directory = S_ISDIR(inode->i_mode);
@@ -426,6 +428,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
return ERR_PTR(-EPERM);
sb = dir->i_sb;
+ trace_ext3_request_inode(dir, mode);
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -601,6 +604,7 @@ got:
}
ext3_debug("allocating inode %lu\n", inode->i_ino);
+ trace_ext3_allocate_inode(inode, dir, mode);
goto really_out;
fail:
ext3_std_error(sb, err);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3451d23c3bae..d5b8c3fa0993 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -38,6 +38,7 @@
#include <linux/bio.h>
#include <linux/fiemap.h>
#include <linux/namei.h>
+#include <trace/events/ext3.h>
#include "xattr.h"
#include "acl.h"
@@ -70,6 +71,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
might_sleep();
+ trace_ext3_forget(inode, is_metadata, blocknr);
BUFFER_TRACE(bh, "enter");
jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
@@ -198,6 +200,7 @@ void ext3_evict_inode (struct inode *inode)
handle_t *handle;
int want_delete = 0;
+ trace_ext3_evict_inode(inode);
if (!inode->i_nlink && !is_bad_inode(inode)) {
dquot_initialize(inode);
want_delete = 1;
@@ -231,12 +234,10 @@ void ext3_evict_inode (struct inode *inode)
if (inode->i_blocks)
ext3_truncate(inode);
/*
- * Kill off the orphan record which ext3_truncate created.
- * AKPM: I think this can be inside the above `if'.
- * Note that ext3_orphan_del() has to be able to cope with the
- * deletion of a non-existent orphan - this is because we don't
- * know if ext3_truncate() actually created an orphan record.
- * (Well, we could do this if we need to, but heck - it works)
+ * Kill off the orphan record created when the inode lost the last
+ * link. Note that ext3_orphan_del() has to be able to cope with the
+ * deletion of a non-existent orphan - ext3_truncate() could
+ * have removed the record.
*/
ext3_orphan_del(handle, inode);
EXT3_I(inode)->i_dtime = get_seconds();
@@ -842,6 +843,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
ext3_fsblk_t first_block = 0;
+ trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
J_ASSERT(handle != NULL || create == 0);
depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
@@ -886,6 +888,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
if (!create || err == -EIO)
goto cleanup;
+ /*
+ * Block out ext3_truncate while we alter the tree
+ */
mutex_lock(&ei->truncate_mutex);
/*
@@ -934,9 +939,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
*/
count = ext3_blks_to_allocate(partial, indirect_blks,
maxblocks, blocks_to_boundary);
- /*
- * Block out ext3_truncate while we alter the tree
- */
err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
offsets + (partial - chain), partial);
@@ -970,6 +972,9 @@ cleanup:
}
BUFFER_TRACE(bh_result, "returned");
out:
+ trace_ext3_get_blocks_exit(inode, iblock,
+ depth ? le32_to_cpu(chain[depth-1].key) : 0,
+ count, err);
return err;
}
@@ -1217,6 +1222,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
* we allocate blocks but write fails for some reason */
int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
+ trace_ext3_write_begin(inode, pos, len, flags);
+
index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -1332,6 +1339,7 @@ static int ext3_ordered_write_end(struct file *file,
unsigned from, to;
int ret = 0, ret2;
+ trace_ext3_ordered_write_end(inode, pos, len, copied);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1367,6 +1375,7 @@ static int ext3_writeback_write_end(struct file *file,
struct inode *inode = file->f_mapping->host;
int ret;
+ trace_ext3_writeback_write_end(inode, pos, len, copied);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
update_file_sizes(inode, pos, copied);
/*
@@ -1395,6 +1404,7 @@ static int ext3_journalled_write_end(struct file *file,
int partial = 0;
unsigned from, to;
+ trace_ext3_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -1577,6 +1587,7 @@ static int ext3_ordered_writepage(struct page *page,
if (ext3_journal_current_handle())
goto out_fail;
+ trace_ext3_ordered_writepage(page);
if (!page_has_buffers(page)) {
create_empty_buffers(page, inode->i_sb->s_blocksize,
(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1647,6 +1658,7 @@ static int ext3_writeback_writepage(struct page *page,
if (ext3_journal_current_handle())
goto out_fail;
+ trace_ext3_writeback_writepage(page);
if (page_has_buffers(page)) {
if (!walk_page_buffers(NULL, page_buffers(page), 0,
PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
@@ -1689,6 +1701,7 @@ static int ext3_journalled_writepage(struct page *page,
if (ext3_journal_current_handle())
goto no_write;
+ trace_ext3_journalled_writepage(page);
handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -1739,6 +1752,7 @@ out_unlock:
static int ext3_readpage(struct file *file, struct page *page)
{
+ trace_ext3_readpage(page);
return mpage_readpage(page, ext3_get_block);
}
@@ -1753,6 +1767,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset)
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+ trace_ext3_invalidatepage(page, offset);
+
/*
* If it's a full truncate we just forget about the pending dirtying
*/
@@ -1766,6 +1782,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+ trace_ext3_releasepage(page);
WARN_ON(PageChecked(page));
if (!page_has_buffers(page))
return 0;
@@ -1794,6 +1811,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_length(iov, nr_segs);
int retries = 0;
+ trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+
if (rw == WRITE) {
loff_t final_size = offset + count;
@@ -1842,7 +1861,7 @@ retry:
/* This is really bad luck. We've written the data
* but cannot extend i_size. Truncate allocated blocks
* and pretend the write failed... */
- ext3_truncate(inode);
+ ext3_truncate_failed_write(inode);
ret = PTR_ERR(handle);
goto out;
}
@@ -1868,6 +1887,8 @@ retry:
ret = err;
}
out:
+ trace_ext3_direct_IO_exit(inode, offset,
+ iov_length(iov, nr_segs), rw, ret);
return ret;
}
@@ -2391,8 +2412,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
int ext3_can_truncate(struct inode *inode)
{
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return 0;
if (S_ISREG(inode->i_mode))
return 1;
if (S_ISDIR(inode->i_mode))
@@ -2446,6 +2465,8 @@ void ext3_truncate(struct inode *inode)
unsigned blocksize = inode->i_sb->s_blocksize;
struct page *page;
+ trace_ext3_truncate_enter(inode);
+
if (!ext3_can_truncate(inode))
goto out_notrans;
@@ -2597,6 +2618,7 @@ out_stop:
ext3_orphan_del(handle, inode);
ext3_journal_stop(handle);
+ trace_ext3_truncate_exit(inode);
return;
out_notrans:
/*
@@ -2605,6 +2627,7 @@ out_notrans:
*/
if (inode->i_nlink)
ext3_orphan_del(NULL, inode);
+ trace_ext3_truncate_exit(inode);
}
static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -2746,6 +2769,7 @@ make_io:
* has in-inode xattrs, or we don't have this inode in memory.
* Read the block from disk.
*/
+ trace_ext3_load_inode(inode);
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ_META, bh);
@@ -3236,9 +3260,12 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
- rc = vmtruncate(inode, attr->ia_size);
- if (rc)
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
+ rc = -EPERM;
goto err_out;
+ }
+ truncate_setsize(inode, attr->ia_size);
+ ext3_truncate(inode);
}
setattr_copy(inode, attr);
@@ -3372,6 +3399,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
int err;
might_sleep();
+ trace_ext3_mark_inode_dirty(inode, _RET_IP_);
err = ext3_reserve_inode_write(handle, inode, &iloc);
if (!err)
err = ext3_mark_iloc_dirty(handle, inode, &iloc);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 34b6d9bfc48a..51736a4ff0cd 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -36,6 +36,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
+#include <trace/events/ext3.h>
#include "namei.h"
#include "xattr.h"
@@ -2144,6 +2145,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
struct ext3_dir_entry_2 * de;
handle_t *handle;
+ trace_ext3_unlink_enter(dir, dentry);
/* Initialize quotas before so that eventual writes go
* in separate transaction */
dquot_initialize(dir);
@@ -2189,6 +2191,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
end_unlink:
ext3_journal_stop(handle);
brelse (bh);
+ trace_ext3_unlink_exit(dentry, retval);
return retval;
}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index aad153ef6b78..662290fb6fff 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -44,6 +44,9 @@
#include "acl.h"
#include "namei.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/ext3.h>
+
#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
#define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
#else
@@ -497,6 +500,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
+static int ext3_drop_inode(struct inode *inode)
+{
+ int drop = generic_drop_inode(inode);
+
+ trace_ext3_drop_inode(inode, drop);
+ return drop;
+}
+
static void ext3_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -788,6 +799,7 @@ static const struct super_operations ext3_sops = {
.destroy_inode = ext3_destroy_inode,
.write_inode = ext3_write_inode,
.dirty_inode = ext3_dirty_inode,
+ .drop_inode = ext3_drop_inode,
.evict_inode = ext3_evict_inode,
.put_super = ext3_put_super,
.sync_fs = ext3_sync_fs,
@@ -2507,6 +2519,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
{
tid_t target;
+ trace_ext3_sync_fs(sb, wait);
if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
if (wait)
log_wait_commit(EXT3_SB(sb)->s_journal, target);
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 32e6cc23bd9a..d565759d82ee 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -803,8 +803,16 @@ inserted:
/* We need to allocate a new block */
ext3_fsblk_t goal = ext3_group_first_block_no(sb,
EXT3_I(inode)->i_block_group);
- ext3_fsblk_t block = ext3_new_block(handle, inode,
- goal, &error);
+ ext3_fsblk_t block;
+
+ /*
+ * Protect us agaist concurrent allocations to the
+ * same inode from ext3_..._writepage(). Reservation
+ * code does not expect racing allocations.
+ */
+ mutex_lock(&EXT3_I(inode)->truncate_mutex);
+ block = ext3_new_block(handle, inode, goal, &error);
+ mutex_unlock(&EXT3_I(inode)->truncate_mutex);
if (error)
goto cleanup;
ea_idebug(inode, "creating block %d", block);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 7257752b6d5d..7018e1d8902d 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -102,7 +102,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
if (attr & ATTR_SYS)
inode->i_flags |= S_IMMUTABLE;
else
- inode->i_flags &= S_IMMUTABLE;
+ inode->i_flags &= ~S_IMMUTABLE;
}
fat_save_attrs(inode, attr);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 2792a790e50b..1c1336e7b3b2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -663,14 +663,19 @@ static void glock_work_func(struct work_struct *work)
drop_ref = 1;
}
spin_lock(&gl->gl_spin);
- if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
+ if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
gl->gl_state != LM_ST_UNLOCKED &&
gl->gl_demote_state != LM_ST_EXCLUSIVE) {
unsigned long holdtime, now = jiffies;
+
holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
if (time_before(now, holdtime))
delay = holdtime - now;
- set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags);
+
+ if (!delay) {
+ clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
+ set_bit(GLF_DEMOTE, &gl->gl_flags);
+ }
}
run_queue(gl, 0);
spin_unlock(&gl->gl_spin);
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index b1991a2a08e0..b9c1a4b5ba89 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -209,6 +209,7 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
int res = -EIO;
u32 ablock, dblock, mask;
+ sector_t sector;
int was_dirty = 0;
int shift;
@@ -255,10 +256,12 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
done:
dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n",
inode->i_ino, (long long)iblock, dblock);
+
mask = (1 << sbi->fs_shift) - 1;
- map_bh(bh_result, sb,
- (dblock << sbi->fs_shift) + sbi->blockoffset +
- (iblock & mask));
+ sector = ((sector_t)dblock << sbi->fs_shift) +
+ sbi->blockoffset + (iblock & mask);
+ map_bh(bh_result, sb, sector);
+
if (create) {
set_buffer_new(bh_result);
hip->phys_size += sb->s_blocksize;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index b49b55584c84..0813af083576 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -393,6 +393,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
if (!sbi->rsrc_clump_blocks)
sbi->rsrc_clump_blocks = 1;
+ err = generic_check_addressable(sbi->alloc_blksz_shift,
+ sbi->total_blocks);
+ if (err) {
+ printk(KERN_ERR "hfs: filesystem size too large.\n");
+ goto out_free_vhdr;
+ }
+
/* Set up operations so we can load metadata */
sb->s_op = &hfsplus_sops;
sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -417,6 +424,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags |= MS_RDONLY;
}
+ err = -EINVAL;
+
/* Load metadata objects (B*Trees) */
sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
if (!sbi->ext_tree) {
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index a3f0bfcc881e..a32998f29f0b 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -142,7 +142,11 @@ int hfsplus_uni2asc(struct super_block *sb,
/* search for single decomposed char */
if (likely(compose))
ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
- if (ce1 && (cc = ce1[0])) {
+ if (ce1)
+ cc = ce1[0];
+ else
+ cc = 0;
+ if (cc) {
/* start of a possibly decomposed Hangul char */
if (cc != 0xffff)
goto done;
@@ -209,7 +213,8 @@ int hfsplus_uni2asc(struct super_block *sb,
i++;
ce2 = ce1;
}
- if ((cc = ce2[0])) {
+ cc = ce2[0];
+ if (cc) {
ip += i;
ustrlen -= i;
goto done;
@@ -301,7 +306,11 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
size = asc2unichar(sb, astr, len, &c);
- if (decompose && (dstr = decompose_unichar(c, &dsize))) {
+ if (decompose)
+ dstr = decompose_unichar(c, &dsize);
+ else
+ dstr = NULL;
+ if (dstr) {
if (outlen + dsize > HFSPLUS_MAX_STRLEN)
break;
do {
@@ -346,15 +355,23 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
astr += size;
len -= size;
- if (decompose && (dstr = decompose_unichar(c, &dsize))) {
+ if (decompose)
+ dstr = decompose_unichar(c, &dsize);
+ else
+ dstr = NULL;
+ if (dstr) {
do {
c2 = *dstr++;
- if (!casefold || (c2 = case_fold(c2)))
+ if (casefold)
+ c2 = case_fold(c2);
+ if (!casefold || c2)
hash = partial_name_hash(c2, hash);
} while (--dsize > 0);
} else {
c2 = c;
- if (!casefold || (c2 = case_fold(c2)))
+ if (casefold)
+ c2 = case_fold(c2);
+ if (!casefold || c2)
hash = partial_name_hash(c2, hash);
}
}
@@ -422,12 +439,14 @@ int hfsplus_compare_dentry(const struct dentry *parent,
c1 = *dstr1;
c2 = *dstr2;
if (casefold) {
- if (!(c1 = case_fold(c1))) {
+ c1 = case_fold(c1);
+ if (!c1) {
dstr1++;
dsize1--;
continue;
}
- if (!(c2 = case_fold(c2))) {
+ c2 = case_fold(c2);
+ if (!c2) {
dstr2++;
dsize2--;
continue;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 3031d81f5f0f..c946d3da095d 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -138,10 +138,6 @@ int hfsplus_read_wrapper(struct super_block *sb)
if (hfsplus_get_last_session(sb, &part_start, &part_size))
goto out;
- if ((u64)part_start + part_size > 0x100000000ULL) {
- pr_err("hfs: volumes larger than 2TB are not supported yet\n");
- goto out;
- }
error = -ENOMEM;
sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
@@ -169,8 +165,9 @@ reread:
if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
goto out_free_backup_vhdr;
wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
- part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
- part_size = wd.embed_count * wd.ablk_size;
+ part_start += (sector_t)wd.ablk_start +
+ (sector_t)wd.embed_start * wd.ablk_size;
+ part_size = (sector_t)wd.embed_count * wd.ablk_size;
goto reread;
default:
/*
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index e4b87bc1fa56..dea7503b47e8 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -22,6 +22,7 @@
#include <linux/jbd.h>
#include <linux/errno.h>
#include <linux/slab.h>
+#include <trace/events/jbd.h>
/*
* Unlink a buffer from a transaction checkpoint list.
@@ -358,6 +359,7 @@ int log_do_checkpoint(journal_t *journal)
* journal straight away.
*/
result = cleanup_journal_tail(journal);
+ trace_jbd_checkpoint(journal, result);
jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
if (result <= 0)
return result;
@@ -503,6 +505,7 @@ int cleanup_journal_tail(journal_t *journal)
if (blocknr < journal->j_tail)
freed = freed + journal->j_last - journal->j_first;
+ trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed);
jbd_debug(1,
"Cleaning journal tail from %d to %d (offset %u), "
"freeing %u\n",
@@ -752,6 +755,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
+ trace_jbd_drop_transaction(journal, transaction);
jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
kfree(transaction);
}
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 72ffa974b0b8..eedd201374a8 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -21,6 +21,7 @@
#include <linux/pagemap.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <trace/events/jbd.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
@@ -204,6 +205,8 @@ write_out_data:
if (!trylock_buffer(bh)) {
BUFFER_TRACE(bh, "needs blocking lock");
spin_unlock(&journal->j_list_lock);
+ trace_jbd_do_submit_data(journal,
+ commit_transaction);
/* Write out all data to prevent deadlocks */
journal_do_submit_data(wbuf, bufs, write_op);
bufs = 0;
@@ -236,6 +239,8 @@ write_out_data:
jbd_unlock_bh_state(bh);
if (bufs == journal->j_wbufsize) {
spin_unlock(&journal->j_list_lock);
+ trace_jbd_do_submit_data(journal,
+ commit_transaction);
journal_do_submit_data(wbuf, bufs, write_op);
bufs = 0;
goto write_out_data;
@@ -266,6 +271,7 @@ write_out_data:
}
}
spin_unlock(&journal->j_list_lock);
+ trace_jbd_do_submit_data(journal, commit_transaction);
journal_do_submit_data(wbuf, bufs, write_op);
return err;
@@ -316,12 +322,14 @@ void journal_commit_transaction(journal_t *journal)
commit_transaction = journal->j_running_transaction;
J_ASSERT(commit_transaction->t_state == T_RUNNING);
+ trace_jbd_start_commit(journal, commit_transaction);
jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid);
spin_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED;
+ trace_jbd_commit_locking(journal, commit_transaction);
spin_lock(&commit_transaction->t_handle_lock);
while (commit_transaction->t_updates) {
DEFINE_WAIT(wait);
@@ -392,6 +400,7 @@ void journal_commit_transaction(journal_t *journal)
*/
journal_switch_revoke_table(journal);
+ trace_jbd_commit_flushing(journal, commit_transaction);
commit_transaction->t_state = T_FLUSH;
journal->j_committing_transaction = commit_transaction;
journal->j_running_transaction = NULL;
@@ -493,6 +502,7 @@ void journal_commit_transaction(journal_t *journal)
commit_transaction->t_state = T_COMMIT;
spin_unlock(&journal->j_state_lock);
+ trace_jbd_commit_logging(journal, commit_transaction);
J_ASSERT(commit_transaction->t_nr_buffers <=
commit_transaction->t_outstanding_credits);
@@ -946,6 +956,7 @@ restart_loop:
}
spin_unlock(&journal->j_list_lock);
+ trace_jbd_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index e2d4285fbe90..ab019ee77888 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -38,6 +38,9 @@
#include <linux/debugfs.h>
#include <linux/ratelimit.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/jbd.h>
+
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -1065,6 +1068,7 @@ void journal_update_superblock(journal_t *journal, int wait)
} else
write_dirty_buffer(bh, WRITE);
+ trace_jbd_update_superblock_end(journal, wait);
out:
/* If we have just flushed the log (by marking s_start==0), then
* any future commit will have to be careful to update the
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index f7ee81a065da..dc39efd05d54 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/hrtimer.h>
+#include <linux/backing-dev.h>
static void __journal_temp_unlink_buffer(struct journal_head *jh);
@@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
alloc_transaction:
if (!journal->j_running_transaction) {
- new_transaction = kzalloc(sizeof(*new_transaction),
- GFP_NOFS|__GFP_NOFAIL);
+ new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS);
if (!new_transaction) {
- ret = -ENOMEM;
- goto out;
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto alloc_transaction;
}
}
@@ -844,8 +844,8 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
*/
JBUFFER_TRACE(jh, "cancelling revoke");
journal_cancel_revoke(handle, jh);
- journal_put_journal_head(jh);
out:
+ journal_put_journal_head(jh);
return err;
}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index adb45ec9038c..8392cb85bd54 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -302,7 +302,8 @@ nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc)
/* We appear to be out of the grace period */
wake_up_all(&host->h_gracewait);
}
- dprintk("lockd: server returns status %d\n", resp->status);
+ dprintk("lockd: server returns status %d\n",
+ ntohl(resp->status));
return 0; /* Okay, call complete */
}
@@ -690,7 +691,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
goto out;
if (resp->status != nlm_lck_denied_nolocks)
- printk("lockd: unexpected unlock status: %d\n", resp->status);
+ printk("lockd: unexpected unlock status: %d\n",
+ ntohl(resp->status));
/* What to do now? I'm out of my depth... */
status = -ENOLCK;
out:
@@ -708,7 +710,13 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
if (task->tk_status < 0) {
dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status);
- goto retry_rebind;
+ switch (task->tk_status) {
+ case -EACCES:
+ case -EIO:
+ goto die;
+ default:
+ goto retry_rebind;
+ }
}
if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
rpc_delay(task, NLMCLNT_GRACE_WAIT);
@@ -837,6 +845,7 @@ nlm_stat_to_errno(__be32 status)
return -ENOLCK;
#endif
}
- printk(KERN_NOTICE "lockd: unexpected server status %d\n", status);
+ printk(KERN_NOTICE "lockd: unexpected server status %d\n",
+ ntohl(status));
return -ENOLCK;
}
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index d4d1954e9bb9..74780f9f852c 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
static u32 initiate_file_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
+ struct nfs_server *server;
struct pnfs_layout_hdr *lo;
struct inode *ino;
bool found = false;
@@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp,
LIST_HEAD(free_me_list);
spin_lock(&clp->cl_lock);
- list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
- if (nfs_compare_fh(&args->cbl_fh,
- &NFS_I(lo->plh_inode)->fh))
- continue;
- ino = igrab(lo->plh_inode);
- if (!ino)
- continue;
- found = true;
- /* Without this, layout can be freed as soon
- * as we release cl_lock.
- */
- get_layout_hdr(lo);
- break;
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ if (nfs_compare_fh(&args->cbl_fh,
+ &NFS_I(lo->plh_inode)->fh))
+ continue;
+ ino = igrab(lo->plh_inode);
+ if (!ino)
+ continue;
+ found = true;
+ /* Without this, layout can be freed as soon
+ * as we release cl_lock.
+ */
+ get_layout_hdr(lo);
+ break;
+ }
+ if (found)
+ break;
}
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
+
if (!found)
return NFS4ERR_NOMATCHING_LAYOUT;
@@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
static u32 initiate_bulk_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
+ struct nfs_server *server;
struct pnfs_layout_hdr *lo;
struct inode *ino;
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
@@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
};
spin_lock(&clp->cl_lock);
- list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
if ((args->cbl_recall_type == RETURN_FSID) &&
- memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
- &args->cbl_fsid, sizeof(struct nfs_fsid)))
- continue;
- if (!igrab(lo->plh_inode))
+ memcmp(&server->fsid, &args->cbl_fsid,
+ sizeof(struct nfs_fsid)))
continue;
- get_layout_hdr(lo);
- BUG_ON(!list_empty(&lo->plh_bulk_recall));
- list_add(&lo->plh_bulk_recall, &recall_list);
+
+ list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ if (!igrab(lo->plh_inode))
+ continue;
+ get_layout_hdr(lo);
+ BUG_ON(!list_empty(&lo->plh_bulk_recall));
+ list_add(&lo->plh_bulk_recall, &recall_list);
+ }
}
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
+
list_for_each_entry_safe(lo, tmp,
&recall_list, plh_bulk_recall) {
ino = lo->plh_inode;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b3dc2b88b65b..5452ada59461 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
cred = rpc_lookup_machine_cred();
if (!IS_ERR(cred))
clp->cl_machine_cred = cred;
-#if defined(CONFIG_NFS_V4_1)
- INIT_LIST_HEAD(&clp->cl_layouts);
-#endif
nfs_fscache_get_client_cookie(clp);
return clp;
@@ -293,6 +290,7 @@ static void nfs_free_client(struct nfs_client *clp)
nfs4_deviceid_purge_client(clp);
kfree(clp->cl_hostname);
+ kfree(clp->server_scope);
kfree(clp);
dprintk("<-- nfs_free_client()\n");
@@ -1062,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->client_link);
INIT_LIST_HEAD(&server->master_link);
INIT_LIST_HEAD(&server->delegations);
+ INIT_LIST_HEAD(&server->layouts);
atomic_set(&server->active, 0);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c4a69833dd0d..b47f0d4710fa 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -48,6 +48,7 @@ enum nfs4_client_state {
NFS4CLNT_SESSION_RESET,
NFS4CLNT_RECALL_SLOT,
NFS4CLNT_LEASE_CONFIRM,
+ NFS4CLNT_SERVER_SCOPE_MISMATCH,
};
enum nfs4_session_state {
@@ -349,6 +350,8 @@ extern void nfs4_schedule_state_manager(struct nfs_client *);
extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+extern void nfs41_handle_server_scope(struct nfs_client *,
+ struct server_scope **);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 426908809c97..b410a9b2d73a 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -343,8 +343,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
- dprintk("%s USE DS:ip %x %hu\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
/* No multipath support. Use first DS */
data->ds_clp = ds->ds_clp;
@@ -383,9 +382,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
- dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
+ dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
data->inode->i_ino, sync, (size_t) data->args.count, offset,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ ds->ds_remotestr);
data->write_done_cb = filelayout_write_done_cb;
data->ds_clp = ds->ds_clp;
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index cebe01e3795e..68cce730b800 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -47,10 +47,17 @@ enum stripetype4 {
};
/* Individual ip address */
+struct nfs4_pnfs_ds_addr {
+ struct sockaddr_storage da_addr;
+ size_t da_addrlen;
+ struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */
+ char *da_remotestr; /* human readable addr+port */
+};
+
struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
- u32 ds_ip_addr;
- u32 ds_port;
+ char *ds_remotestr; /* comma sep list of addrs */
+ struct list_head ds_addrs;
struct nfs_client *ds_clp;
atomic_t ds_count;
};
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 3b7bf1377264..77c171e31316 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -56,54 +56,139 @@ print_ds(struct nfs4_pnfs_ds *ds)
printk("%s NULL device\n", __func__);
return;
}
- printk(" ip_addr %x port %hu\n"
+ printk(" ds %s\n"
" ref count %d\n"
" client %p\n"
" cl_exchange_flags %x\n",
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+ ds->ds_remotestr,
atomic_read(&ds->ds_count), ds->ds_clp,
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
}
-/* nfs4_ds_cache_lock is held */
-static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(u32 ip_addr, u32 port)
+static bool
+same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
{
- struct nfs4_pnfs_ds *ds;
+ struct sockaddr_in *a, *b;
+ struct sockaddr_in6 *a6, *b6;
+
+ if (addr1->sa_family != addr2->sa_family)
+ return false;
+
+ switch (addr1->sa_family) {
+ case AF_INET:
+ a = (struct sockaddr_in *)addr1;
+ b = (struct sockaddr_in *)addr2;
+
+ if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
+ a->sin_port == b->sin_port)
+ return true;
+ break;
+
+ case AF_INET6:
+ a6 = (struct sockaddr_in6 *)addr1;
+ b6 = (struct sockaddr_in6 *)addr2;
+
+ /* LINKLOCAL addresses must have matching scope_id */
+ if (ipv6_addr_scope(&a6->sin6_addr) ==
+ IPV6_ADDR_SCOPE_LINKLOCAL &&
+ a6->sin6_scope_id != b6->sin6_scope_id)
+ return false;
+
+ if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
+ a6->sin6_port == b6->sin6_port)
+ return true;
+ break;
+
+ default:
+ dprintk("%s: unhandled address family: %u\n",
+ __func__, addr1->sa_family);
+ return false;
+ }
- dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
- ntohl(ip_addr), ntohs(port));
+ return false;
+}
- list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
- if (ds->ds_ip_addr == ip_addr &&
- ds->ds_port == port) {
- return ds;
+/*
+ * Lookup DS by addresses. The first matching address returns true.
+ * nfs4_ds_cache_lock is held
+ */
+static struct nfs4_pnfs_ds *
+_data_server_lookup_locked(struct list_head *dsaddrs)
+{
+ struct nfs4_pnfs_ds *ds;
+ struct nfs4_pnfs_ds_addr *da1, *da2;
+
+ list_for_each_entry(da1, dsaddrs, da_node) {
+ list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
+ list_for_each_entry(da2, &ds->ds_addrs, da_node) {
+ if (same_sockaddr(
+ (struct sockaddr *)&da1->da_addr,
+ (struct sockaddr *)&da2->da_addr))
+ return ds;
+ }
}
}
return NULL;
}
/*
+ * Compare two lists of addresses.
+ */
+static bool
+_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
+ struct list_head *dsaddrs2)
+{
+ struct nfs4_pnfs_ds_addr *da1, *da2;
+ size_t count1 = 0,
+ count2 = 0;
+
+ list_for_each_entry(da1, dsaddrs1, da_node)
+ count1++;
+
+ list_for_each_entry(da2, dsaddrs2, da_node) {
+ bool found = false;
+ count2++;
+ list_for_each_entry(da1, dsaddrs1, da_node) {
+ if (same_sockaddr((struct sockaddr *)&da1->da_addr,
+ (struct sockaddr *)&da2->da_addr)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return false;
+ }
+
+ return (count1 == count2);
+}
+
+/*
* Create an rpc connection to the nfs4_pnfs_ds data server
- * Currently only support IPv4
+ * Currently only supports IPv4 and IPv6 addresses
*/
static int
nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
{
- struct nfs_client *clp;
- struct sockaddr_in sin;
+ struct nfs_client *clp = ERR_PTR(-EIO);
+ struct nfs4_pnfs_ds_addr *da;
int status = 0;
- dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+ dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = ds->ds_ip_addr;
- sin.sin_port = ds->ds_port;
+ BUG_ON(list_empty(&ds->ds_addrs));
+
+ list_for_each_entry(da, &ds->ds_addrs, da_node) {
+ dprintk("%s: DS %s: trying address %s\n",
+ __func__, ds->ds_remotestr, da->da_remotestr);
+
+ clp = nfs4_set_ds_client(mds_srv->nfs_client,
+ (struct sockaddr *)&da->da_addr,
+ da->da_addrlen, IPPROTO_TCP);
+ if (!IS_ERR(clp))
+ break;
+ }
- clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
- sizeof(sin), IPPROTO_TCP);
if (IS_ERR(clp)) {
status = PTR_ERR(clp);
goto out;
@@ -115,8 +200,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out_put;
}
ds->ds_clp = clp;
- dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ dprintk("%s [existing] server=%s\n", __func__,
+ ds->ds_remotestr);
goto out;
}
@@ -135,8 +220,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out_put;
ds->ds_clp = clp;
- dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
- ntohs(ds->ds_port));
+ dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
out:
return status;
out_put:
@@ -147,12 +231,25 @@ out_put:
static void
destroy_ds(struct nfs4_pnfs_ds *ds)
{
+ struct nfs4_pnfs_ds_addr *da;
+
dprintk("--> %s\n", __func__);
ifdebug(FACILITY)
print_ds(ds);
if (ds->ds_clp)
nfs_put_client(ds->ds_clp);
+
+ while (!list_empty(&ds->ds_addrs)) {
+ da = list_first_entry(&ds->ds_addrs,
+ struct nfs4_pnfs_ds_addr,
+ da_node);
+ list_del_init(&da->da_node);
+ kfree(da->da_remotestr);
+ kfree(da);
+ }
+
+ kfree(ds->ds_remotestr);
kfree(ds);
}
@@ -179,31 +276,96 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
kfree(dsaddr);
}
+/*
+ * Create a string with a human readable address and port to avoid
+ * complicated setup around many dprinks.
+ */
+static char *
+nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
+{
+ struct nfs4_pnfs_ds_addr *da;
+ char *remotestr;
+ size_t len;
+ char *p;
+
+ len = 3; /* '{', '}' and eol */
+ list_for_each_entry(da, dsaddrs, da_node) {
+ len += strlen(da->da_remotestr) + 1; /* string plus comma */
+ }
+
+ remotestr = kzalloc(len, gfp_flags);
+ if (!remotestr)
+ return NULL;
+
+ p = remotestr;
+ *(p++) = '{';
+ len--;
+ list_for_each_entry(da, dsaddrs, da_node) {
+ size_t ll = strlen(da->da_remotestr);
+
+ if (ll > len)
+ goto out_err;
+
+ memcpy(p, da->da_remotestr, ll);
+ p += ll;
+ len -= ll;
+
+ if (len < 1)
+ goto out_err;
+ (*p++) = ',';
+ len--;
+ }
+ if (len < 2)
+ goto out_err;
+ *(p++) = '}';
+ *p = '\0';
+ return remotestr;
+out_err:
+ kfree(remotestr);
+ return NULL;
+}
+
static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
+nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
{
- struct nfs4_pnfs_ds *tmp_ds, *ds;
+ struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
+ char *remotestr;
- ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
+ if (list_empty(dsaddrs)) {
+ dprintk("%s: no addresses defined\n", __func__);
+ goto out;
+ }
+
+ ds = kzalloc(sizeof(*ds), gfp_flags);
if (!ds)
goto out;
+ /* this is only used for debugging, so it's ok if its NULL */
+ remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
+
spin_lock(&nfs4_ds_cache_lock);
- tmp_ds = _data_server_lookup_locked(ip_addr, port);
+ tmp_ds = _data_server_lookup_locked(dsaddrs);
if (tmp_ds == NULL) {
- ds->ds_ip_addr = ip_addr;
- ds->ds_port = port;
+ INIT_LIST_HEAD(&ds->ds_addrs);
+ list_splice_init(dsaddrs, &ds->ds_addrs);
+ ds->ds_remotestr = remotestr;
atomic_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
ds->ds_clp = NULL;
list_add(&ds->ds_node, &nfs4_data_server_cache);
- dprintk("%s add new data server ip 0x%x\n", __func__,
- ds->ds_ip_addr);
+ dprintk("%s add new data server %s\n", __func__,
+ ds->ds_remotestr);
} else {
+ if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
+ dsaddrs)) {
+ dprintk("%s: multipath address mismatch: %s != %s",
+ __func__, tmp_ds->ds_remotestr, remotestr);
+ }
+ kfree(remotestr);
kfree(ds);
atomic_inc(&tmp_ds->ds_count);
- dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
- __func__, tmp_ds->ds_ip_addr,
+ dprintk("%s data server %s found, inc'ed ds_count to %d\n",
+ __func__, ds->ds_remotestr,
atomic_read(&tmp_ds->ds_count));
ds = tmp_ds;
}
@@ -213,18 +375,22 @@ out:
}
/*
- * Currently only support ipv4, and one multi-path address.
+ * Currently only supports ipv4, ipv6 and one multi-path address.
*/
-static struct nfs4_pnfs_ds *
-decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
+static struct nfs4_pnfs_ds_addr *
+decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
{
- struct nfs4_pnfs_ds *ds = NULL;
- char *buf;
- const char *ipend, *pstr;
- u32 ip_addr, port;
- int nlen, rlen, i;
+ struct nfs4_pnfs_ds_addr *da = NULL;
+ char *buf, *portstr;
+ u32 port;
+ int nlen, rlen;
int tmp[2];
__be32 *p;
+ char *netid, *match_netid;
+ size_t len, match_netid_len;
+ char *startsep = "";
+ char *endsep = "";
+
/* r_netid */
p = xdr_inline_decode(streamp, 4);
@@ -236,64 +402,123 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla
if (unlikely(!p))
goto out_err;
- /* Check that netid is "tcp" */
- if (nlen != 3 || memcmp((char *)p, "tcp", 3)) {
- dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
+ netid = kmalloc(nlen+1, gfp_flags);
+ if (unlikely(!netid))
goto out_err;
- }
- /* r_addr */
+ netid[nlen] = '\0';
+ memcpy(netid, p, nlen);
+
+ /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
p = xdr_inline_decode(streamp, 4);
if (unlikely(!p))
- goto out_err;
+ goto out_free_netid;
rlen = be32_to_cpup(p);
p = xdr_inline_decode(streamp, rlen);
if (unlikely(!p))
- goto out_err;
+ goto out_free_netid;
- /* ipv6 length plus port is legal */
- if (rlen > INET6_ADDRSTRLEN + 8) {
+ /* port is ".ABC.DEF", 8 chars max */
+ if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
dprintk("%s: Invalid address, length %d\n", __func__,
rlen);
- goto out_err;
+ goto out_free_netid;
}
buf = kmalloc(rlen + 1, gfp_flags);
if (!buf) {
dprintk("%s: Not enough memory\n", __func__);
- goto out_err;
+ goto out_free_netid;
}
buf[rlen] = '\0';
memcpy(buf, p, rlen);
- /* replace the port dots with dashes for the in4_pton() delimiter*/
- for (i = 0; i < 2; i++) {
- char *res = strrchr(buf, '.');
- if (!res) {
- dprintk("%s: Failed finding expected dots in port\n",
- __func__);
- goto out_free;
- }
- *res = '-';
+ /* replace port '.' with '-' */
+ portstr = strrchr(buf, '.');
+ if (!portstr) {
+ dprintk("%s: Failed finding expected dot in port\n",
+ __func__);
+ goto out_free_buf;
+ }
+ *portstr = '-';
+
+ /* find '.' between address and port */
+ portstr = strrchr(buf, '.');
+ if (!portstr) {
+ dprintk("%s: Failed finding expected dot between address and "
+ "port\n", __func__);
+ goto out_free_buf;
}
+ *portstr = '\0';
- /* Currently only support ipv4 address */
- if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
- dprintk("%s: Only ipv4 addresses supported\n", __func__);
- goto out_free;
+ da = kzalloc(sizeof(*da), gfp_flags);
+ if (unlikely(!da))
+ goto out_free_buf;
+
+ INIT_LIST_HEAD(&da->da_node);
+
+ if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
+ sizeof(da->da_addr))) {
+ dprintk("%s: error parsing address %s\n", __func__, buf);
+ goto out_free_da;
}
- /* port */
- pstr = ipend;
- sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
+ portstr++;
+ sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
port = htons((tmp[0] << 8) | (tmp[1]));
- ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
- dprintk("%s: Decoded address and port %s\n", __func__, buf);
-out_free:
+ switch (da->da_addr.ss_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
+ da->da_addrlen = sizeof(struct sockaddr_in);
+ match_netid = "tcp";
+ match_netid_len = 3;
+ break;
+
+ case AF_INET6:
+ ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
+ da->da_addrlen = sizeof(struct sockaddr_in6);
+ match_netid = "tcp6";
+ match_netid_len = 4;
+ startsep = "[";
+ endsep = "]";
+ break;
+
+ default:
+ dprintk("%s: unsupported address family: %u\n",
+ __func__, da->da_addr.ss_family);
+ goto out_free_da;
+ }
+
+ if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
+ dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
+ __func__, netid, match_netid);
+ goto out_free_da;
+ }
+
+ /* save human readable address */
+ len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
+ da->da_remotestr = kzalloc(len, gfp_flags);
+
+ /* NULL is ok, only used for dprintk */
+ if (da->da_remotestr)
+ snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
+ buf, endsep, ntohs(port));
+
+ dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
kfree(buf);
+ kfree(netid);
+ return da;
+
+out_free_da:
+ kfree(da);
+out_free_buf:
+ dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
+ kfree(buf);
+out_free_netid:
+ kfree(netid);
out_err:
- return ds;
+ return NULL;
}
/* Decode opaque device data and return the result */
@@ -310,6 +535,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
struct xdr_stream stream;
struct xdr_buf buf;
struct page *scratch;
+ struct list_head dsaddrs;
+ struct nfs4_pnfs_ds_addr *da;
/* set up xdr stream */
scratch = alloc_page(gfp_flags);
@@ -386,6 +613,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
NFS_SERVER(ino)->nfs_client,
&pdev->dev_id);
+ INIT_LIST_HEAD(&dsaddrs);
+
for (i = 0; i < dsaddr->ds_num; i++) {
int j;
u32 mp_count;
@@ -395,48 +624,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
goto out_err_free_deviceid;
mp_count = be32_to_cpup(p); /* multipath count */
- if (mp_count > 1) {
- printk(KERN_WARNING
- "%s: Multipath count %d not supported, "
- "skipping all greater than 1\n", __func__,
- mp_count);
- }
for (j = 0; j < mp_count; j++) {
- if (j == 0) {
- dsaddr->ds_list[i] = decode_and_add_ds(&stream,
- ino, gfp_flags);
- if (dsaddr->ds_list[i] == NULL)
- goto out_err_free_deviceid;
- } else {
- u32 len;
- /* skip extra multipath */
-
- /* read len, skip */
- p = xdr_inline_decode(&stream, 4);
- if (unlikely(!p))
- goto out_err_free_deviceid;
- len = be32_to_cpup(p);
-
- p = xdr_inline_decode(&stream, len);
- if (unlikely(!p))
- goto out_err_free_deviceid;
-
- /* read len, skip */
- p = xdr_inline_decode(&stream, 4);
- if (unlikely(!p))
- goto out_err_free_deviceid;
- len = be32_to_cpup(p);
-
- p = xdr_inline_decode(&stream, len);
- if (unlikely(!p))
- goto out_err_free_deviceid;
- }
+ da = decode_ds_addr(&stream, gfp_flags);
+ if (da)
+ list_add_tail(&da->da_node, &dsaddrs);
+ }
+ if (list_empty(&dsaddrs)) {
+ dprintk("%s: no suitable DS addresses found\n",
+ __func__);
+ goto out_err_free_deviceid;
+ }
+
+ dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ if (!dsaddr->ds_list[i])
+ goto out_err_drain_dsaddrs;
+
+ /* If DS was already in cache, free ds addrs */
+ while (!list_empty(&dsaddrs)) {
+ da = list_first_entry(&dsaddrs,
+ struct nfs4_pnfs_ds_addr,
+ da_node);
+ list_del_init(&da->da_node);
+ kfree(da->da_remotestr);
+ kfree(da);
}
}
__free_page(scratch);
return dsaddr;
+out_err_drain_dsaddrs:
+ while (!list_empty(&dsaddrs)) {
+ da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
+ da_node);
+ list_del_init(&da->da_node);
+ kfree(da->da_remotestr);
+ kfree(da);
+ }
out_err_free_deviceid:
nfs4_fl_free_deviceid(dsaddr);
/* stripe_indicies was part of dsaddr */
@@ -591,13 +815,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
static void
filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
- int err, u32 ds_addr)
+ int err, const char *ds_remotestr)
{
u32 *p = (u32 *)&dsaddr->id_node.deviceid;
- printk(KERN_ERR "NFS: data server %x connection error %d."
+ printk(KERN_ERR "NFS: data server %s connection error %d."
" Deviceid [%x%x%x%x] marked out of use.\n",
- ds_addr, err, p[0], p[1], p[2], p[3]);
+ ds_remotestr, err, p[0], p[1], p[2], p[3]);
spin_lock(&nfs4_ds_cache_lock);
dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
@@ -628,7 +852,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
err = nfs4_ds_connect(s, ds);
if (err) {
filelayout_mark_devid_negative(dsaddr, err,
- ntohl(ds->ds_ip_addr));
+ ds->ds_remotestr);
return NULL;
}
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d2c4b59c896d..e7f043a6db11 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4768,6 +4768,16 @@ out_inval:
return -NFS4ERR_INVAL;
}
+static bool
+nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
+{
+ if (a->server_scope_sz == b->server_scope_sz &&
+ memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
+ return true;
+
+ return false;
+}
+
/*
* nfs4_proc_exchange_id()
*
@@ -4810,9 +4820,31 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
init_utsname()->domainname,
clp->cl_rpcclient->cl_auth->au_flavor);
+ res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
+ if (unlikely(!res.server_scope))
+ return -ENOMEM;
+
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
if (!status)
status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
+
+ if (!status) {
+ if (clp->server_scope &&
+ !nfs41_same_server_scope(clp->server_scope,
+ res.server_scope)) {
+ dprintk("%s: server_scope mismatch detected\n",
+ __func__);
+ set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
+ kfree(clp->server_scope);
+ clp->server_scope = NULL;
+ }
+
+ if (!clp->server_scope)
+ clp->server_scope = res.server_scope;
+ else
+ kfree(res.server_scope);
+ }
+
dprintk("<-- %s status= %d\n", __func__, status);
return status;
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e97dd219f84f..5d744a52b4e1 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1643,7 +1643,14 @@ static void nfs4_state_manager(struct nfs_client *clp)
goto out_error;
}
clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
- set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
+
+ if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
+ &clp->cl_state))
+ nfs4_state_start_reclaim_nograce(clp);
+ else
+ set_bit(NFS4CLNT_RECLAIM_REBOOT,
+ &clp->cl_state);
+
pnfs_destroy_all_layouts(clp);
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d869a5e5464b..4aa00918f8a5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4977,11 +4977,17 @@ static int decode_exchange_id(struct xdr_stream *xdr,
if (unlikely(status))
return status;
- /* Throw away server_scope */
+ /* Save server_scope */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
+ if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+ return -EIO;
+
+ memcpy(res->server_scope->server_scope, dummy_str, dummy);
+ res->server_scope->server_scope_sz = dummy;
+
/* Throw away Implementation id array */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8c1309d852a6..b918dd1a66c1 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -448,11 +448,17 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
void
pnfs_destroy_all_layouts(struct nfs_client *clp)
{
+ struct nfs_server *server;
struct pnfs_layout_hdr *lo;
LIST_HEAD(tmp_list);
spin_lock(&clp->cl_lock);
- list_splice_init(&clp->cl_layouts, &tmp_list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ if (!list_empty(&server->layouts))
+ list_splice_init(&server->layouts, &tmp_list);
+ }
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
while (!list_empty(&tmp_list)) {
@@ -915,7 +921,8 @@ pnfs_update_layout(struct inode *ino,
};
unsigned pg_offset;
struct nfs_inode *nfsi = NFS_I(ino);
- struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(ino);
+ struct nfs_client *clp = server->nfs_client;
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg = NULL;
bool first = false;
@@ -959,7 +966,7 @@ pnfs_update_layout(struct inode *ino,
*/
spin_lock(&clp->cl_lock);
BUG_ON(!list_empty(&lo->plh_layouts));
- list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
+ list_add_tail(&lo->plh_layouts, &server->layouts);
spin_unlock(&clp->cl_lock);
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 48d0a8e4d062..96bf4e6f45be 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -186,6 +186,7 @@ int pnfs_ld_read_done(struct nfs_read_data *);
/* pnfs_dev.c */
struct nfs4_deviceid_node {
struct hlist_node node;
+ struct hlist_node tmpnode;
const struct pnfs_layoutdriver_type *ld;
const struct nfs_client *nfs_client;
struct nfs4_deviceid deviceid;
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index c65e133ce9c0..503186e2e9de 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -174,6 +174,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
const struct nfs4_deviceid *id)
{
INIT_HLIST_NODE(&d->node);
+ INIT_HLIST_NODE(&d->tmpnode);
d->ld = ld;
d->nfs_client = nfs_client;
d->deviceid = *id;
@@ -238,24 +239,28 @@ static void
_deviceid_purge_client(const struct nfs_client *clp, long hash)
{
struct nfs4_deviceid_node *d;
- struct hlist_node *n, *next;
+ struct hlist_node *n;
HLIST_HEAD(tmp);
+ spin_lock(&nfs4_deviceid_lock);
rcu_read_lock();
hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
if (d->nfs_client == clp && atomic_read(&d->ref)) {
hlist_del_init_rcu(&d->node);
- hlist_add_head(&d->node, &tmp);
+ hlist_add_head(&d->tmpnode, &tmp);
}
rcu_read_unlock();
+ spin_unlock(&nfs4_deviceid_lock);
if (hlist_empty(&tmp))
return;
synchronize_rcu();
- hlist_for_each_entry_safe(d, n, next, &tmp, node)
+ while (!hlist_empty(&tmp)) {
if (atomic_dec_and_test(&d->ref))
d->ld->free_deviceid_node(d);
+ hlist_del_init(&d->tmpnode);
+ }
}
void
@@ -263,8 +268,6 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
{
long h;
- spin_lock(&nfs4_deviceid_lock);
for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
_deviceid_purge_client(clp, h);
- spin_unlock(&nfs4_deviceid_lock);
}
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 7eafe468a29c..b2e3ff347620 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1346,6 +1346,11 @@ static void nilfs_btree_shrink(struct nilfs_bmap *btree,
path[level].bp_bh = NULL;
}
+static void nilfs_btree_nop(struct nilfs_bmap *btree,
+ struct nilfs_btree_path *path,
+ int level, __u64 *keyp, __u64 *ptrp)
+{
+}
static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
struct nilfs_btree_path *path,
@@ -1356,20 +1361,19 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
struct buffer_head *bh;
struct nilfs_btree_node *node, *parent, *sib;
__u64 sibptr;
- int pindex, level, ncmin, ncmax, ncblk, ret;
+ int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
ret = 0;
stats->bs_nblocks = 0;
ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
ncblk = nilfs_btree_nchildren_per_block(btree);
- for (level = NILFS_BTREE_LEVEL_NODE_MIN;
+ for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
level < nilfs_btree_height(btree) - 1;
level++) {
node = nilfs_btree_get_nonroot_node(path, level);
path[level].bp_oldreq.bpr_ptr =
- nilfs_btree_node_get_ptr(node, path[level].bp_index,
- ncblk);
+ nilfs_btree_node_get_ptr(node, dindex, ncblk);
ret = nilfs_bmap_prepare_end_ptr(btree,
&path[level].bp_oldreq, dat);
if (ret < 0)
@@ -1383,6 +1387,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
pindex = path[level + 1].bp_index;
+ dindex = pindex;
if (pindex > 0) {
/* left sibling */
@@ -1421,6 +1426,14 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
path[level].bp_sib_bh = bh;
path[level].bp_op = nilfs_btree_concat_right;
stats->bs_nblocks++;
+ /*
+ * When merging right sibling node
+ * into the current node, pointer to
+ * the right sibling node must be
+ * terminated instead. The adjustment
+ * below is required for that.
+ */
+ dindex = pindex + 1;
/* continue; */
}
} else {
@@ -1431,29 +1444,31 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
NILFS_BTREE_ROOT_NCHILDREN_MAX) {
path[level].bp_op = nilfs_btree_shrink;
stats->bs_nblocks += 2;
+ level++;
+ path[level].bp_op = nilfs_btree_nop;
+ goto shrink_root_child;
} else {
path[level].bp_op = nilfs_btree_do_delete;
stats->bs_nblocks++;
+ goto out;
}
-
- goto out;
-
}
}
+ /* child of the root node is deleted */
+ path[level].bp_op = nilfs_btree_do_delete;
+ stats->bs_nblocks++;
+
+shrink_root_child:
node = nilfs_btree_get_root(btree);
path[level].bp_oldreq.bpr_ptr =
- nilfs_btree_node_get_ptr(node, path[level].bp_index,
+ nilfs_btree_node_get_ptr(node, dindex,
NILFS_BTREE_ROOT_NCHILDREN_MAX);
ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
if (ret < 0)
goto err_out_child_node;
- /* child of the root node is deleted */
- path[level].bp_op = nilfs_btree_do_delete;
- stats->bs_nblocks++;
-
/* success */
out:
*levelp = level;